LCOV - code coverage report
Current view: top level - src/grid - grid_task_list.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:ccc2433) Lines: 132 183 72.1 %
Date: 2024-04-25 07:09:54 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <math.h>
      10             : #include <stddef.h>
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : 
      15             : #include "common/grid_common.h"
      16             : #include "common/grid_constants.h"
      17             : #include "common/grid_library.h"
      18             : #include "grid_task_list.h"
      19             : 
      20             : /*******************************************************************************
      21             :  * \brief Allocates a task list which can be passed to grid_collocate_task_list.
      22             :  *        See grid_task_list.h for details.
      23             :  * \author Ole Schuett
      24             :  ******************************************************************************/
      25       13262 : void grid_create_task_list(
      26             :     const bool orthorhombic, const int ntasks, const int nlevels,
      27             :     const int natoms, const int nkinds, const int nblocks,
      28             :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
      29             :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
      30             :     const int level_list[ntasks], const int iatom_list[ntasks],
      31             :     const int jatom_list[ntasks], const int iset_list[ntasks],
      32             :     const int jset_list[ntasks], const int ipgf_list[ntasks],
      33             :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
      34             :     const int block_num_list[ntasks], const double radius_list[ntasks],
      35             :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
      36             :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
      37             :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
      38             :     const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
      39             : 
      40       13262 :   const grid_library_config config = grid_library_get_config();
      41             : 
      42       13262 :   grid_task_list *task_list = NULL;
      43             : 
      44       13262 :   if (*task_list_out == NULL) {
      45        7916 :     task_list = malloc(sizeof(grid_task_list));
      46        7916 :     memset(task_list, 0, sizeof(grid_task_list));
      47             : 
      48             :     // Resolve AUTO to a concrete backend.
      49        7916 :     if (config.backend == GRID_BACKEND_AUTO) {
      50             : 
      51             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
      52             :       task_list->backend = GRID_BACKEND_HIP;
      53             : #elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      54             :       task_list->backend = GRID_BACKEND_GPU;
      55             : #else
      56        7896 :       task_list->backend = GRID_BACKEND_CPU;
      57             : #endif
      58             :     } else {
      59          20 :       task_list->backend = config.backend;
      60             :     }
      61             :   } else {
      62             :     // Reuse existing task list.
      63        5346 :     task_list = *task_list_out;
      64        5346 :     free(task_list->npts_local);
      65             :   }
      66             : 
      67             :   // Store npts_local for bounds checking and validation.
      68       13262 :   task_list->nlevels = nlevels;
      69       13262 :   size_t size = nlevels * 3 * sizeof(int);
      70       13262 :   task_list->npts_local = malloc(size);
      71       13262 :   memcpy(task_list->npts_local, npts_local, size);
      72             : 
      73             :   // Always create reference backend because it might be needed for validation.
      74       13262 :   grid_ref_create_task_list(
      75             :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      76             :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      77             :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
      78             :       block_num_list, radius_list, rab_list, npts_global, npts_local,
      79             :       shift_local, border_width, dh, dh_inv, &task_list->ref);
      80             : 
      81             :   // Create other backend, if selected.
      82       13262 :   switch (task_list->backend) {
      83             :   case GRID_BACKEND_REF:
      84             :     break; // was already created above
      85       13238 :   case GRID_BACKEND_CPU:
      86       13238 :     grid_cpu_create_task_list(
      87             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      88             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      89             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      90             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      91             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
      92       13238 :     break;
      93          20 :   case GRID_BACKEND_DGEMM:
      94          20 :     grid_dgemm_create_task_list(
      95             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      96             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      97             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      98             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      99             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
     100          20 :     break;
     101             : 
     102           0 :   case GRID_BACKEND_GPU:
     103             : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
     104             :     grid_gpu_create_task_list(
     105             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     106             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     107             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     108             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     109             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
     110             : #else
     111           0 :     fprintf(stderr,
     112             :             "Error: The GPU grid backend is not available. "
     113             :             "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
     114           0 :     abort();
     115             : #endif
     116           0 :     break;
     117             : 
     118           0 :   case GRID_BACKEND_HIP:
     119             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     120             :     grid_hip_create_task_list(
     121             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     122             :         &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
     123             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     124             :         border_mask_list, block_num_list, radius_list, &rab_list[0][0],
     125             :         &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
     126             :         &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
     127             : #else
     128           0 :     fprintf(stderr, "Error: The HIP grid backend is not available. "
     129             :                     "Please re-compile with -D__OFFLOAD_HIP");
     130           0 :     abort();
     131             : #endif
     132           0 :     break;
     133             : 
     134           0 :   default:
     135           0 :     printf("Error: Unknown grid backend: %i.\n", config.backend);
     136           0 :     abort();
     137       13262 :     break;
     138             :   }
     139             : 
     140       13262 :   *task_list_out = task_list;
     141       13262 : }
     142             : 
     143             : /*******************************************************************************
     144             :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     145             :  * \author Ole Schuett
     146             :  ******************************************************************************/
     147        7916 : void grid_free_task_list(grid_task_list *task_list) {
     148             : 
     149        7916 :   if (task_list->ref != NULL) {
     150        7916 :     grid_ref_free_task_list(task_list->ref);
     151        7916 :     task_list->ref = NULL;
     152             :   }
     153        7916 :   if (task_list->cpu != NULL) {
     154        7904 :     grid_cpu_free_task_list(task_list->cpu);
     155        7904 :     task_list->cpu = NULL;
     156             :   }
     157        7916 :   if (task_list->dgemm != NULL) {
     158           8 :     grid_dgemm_free_task_list(task_list->dgemm);
     159           8 :     task_list->dgemm = NULL;
     160             :   }
     161             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     162             :   if (task_list->gpu != NULL) {
     163             :     grid_gpu_free_task_list(task_list->gpu);
     164             :     task_list->gpu = NULL;
     165             :   }
     166             : #endif
     167             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     168             :   if (task_list->hip != NULL) {
     169             :     grid_hip_free_task_list(task_list->hip);
     170             :     task_list->hip = NULL;
     171             :   }
     172             : #endif
     173             : 
     174        7916 :   free(task_list->npts_local);
     175        7916 :   free(task_list);
     176        7916 : }
     177             : 
     178             : /*******************************************************************************
     179             :  * \brief Collocate all tasks of in given list onto given grids.
     180             :  *        See grid_task_list.h for details.
     181             :  * \author Ole Schuett
     182             :  ******************************************************************************/
     183      191371 : void grid_collocate_task_list(const grid_task_list *task_list,
     184             :                               const enum grid_func func, const int nlevels,
     185             :                               const int npts_local[nlevels][3],
     186             :                               const offload_buffer *pab_blocks,
     187             :                               offload_buffer *grids[nlevels]) {
     188             : 
     189             :   // Bounds check.
     190      191371 :   assert(task_list->nlevels == nlevels);
     191      951429 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     192      760058 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     193      760058 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     194      760058 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     195             :   }
     196             : 
     197      191371 :   switch (task_list->backend) {
     198          26 :   case GRID_BACKEND_REF:
     199          26 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     200             :                                  grids);
     201          26 :     break;
     202      191187 :   case GRID_BACKEND_CPU:
     203      191187 :     grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
     204             :                                  grids);
     205      191187 :     break;
     206         158 :   case GRID_BACKEND_DGEMM:
     207         158 :     grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
     208             :                                    grids);
     209         158 :     break;
     210             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     211             :   case GRID_BACKEND_GPU:
     212             :     grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
     213             :                                  grids);
     214             :     break;
     215             : #endif
     216             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     217             :   case GRID_BACKEND_HIP:
     218             :     grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
     219             :                                  grids);
     220             :     break;
     221             : #endif
     222           0 :   default:
     223           0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     224           0 :     abort();
     225      191371 :     break;
     226             :   }
     227             : 
     228             :   // Perform validation if enabled.
     229      191371 :   if (grid_library_get_config().validate) {
     230             :     // Allocate space for reference results.
     231          26 :     offload_buffer *grids_ref[nlevels];
     232         130 :     for (int level = 0; level < nlevels; level++) {
     233         104 :       const int npts_local_total =
     234         104 :           npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
     235         104 :       grids_ref[level] = NULL;
     236         104 :       offload_create_buffer(npts_local_total, &grids_ref[level]);
     237             :     }
     238             : 
     239             :     // Call reference implementation.
     240          26 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     241             :                                  grids_ref);
     242             : 
     243             :     // Compare results.
     244          26 :     const double tolerance = 1e-12;
     245          26 :     double max_rel_diff = 0.0;
     246         130 :     for (int level = 0; level < nlevels; level++) {
     247        2374 :       for (int i = 0; i < npts_local[level][0]; i++) {
     248       81112 :         for (int j = 0; j < npts_local[level][1]; j++) {
     249     3619188 :           for (int k = 0; k < npts_local[level][2]; k++) {
     250     3540346 :             const int idx = k * npts_local[level][1] * npts_local[level][0] +
     251     3540346 :                             j * npts_local[level][0] + i;
     252     3540346 :             const double ref_value = grids_ref[level]->host_buffer[idx];
     253     3540346 :             const double test_value = grids[level]->host_buffer[idx];
     254     3540346 :             const double diff = fabs(test_value - ref_value);
     255     3540346 :             const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     256     3540346 :             max_rel_diff = fmax(max_rel_diff, rel_diff);
     257     3540346 :             if (rel_diff > tolerance) {
     258           0 :               fprintf(stderr, "Error: Validation failure in grid collocate\n");
     259           0 :               fprintf(stderr, "   diff:     %le\n", diff);
     260           0 :               fprintf(stderr, "   rel_diff: %le\n", rel_diff);
     261           0 :               fprintf(stderr, "   value:    %le\n", ref_value);
     262           0 :               fprintf(stderr, "   level:    %i\n", level);
     263           0 :               fprintf(stderr, "   ijk:      %i  %i  %i\n", i, j, k);
     264           0 :               abort();
     265             :             }
     266             :           }
     267             :         }
     268             :       }
     269         104 :       offload_free_buffer(grids_ref[level]);
     270         104 :       printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
     271             :     }
     272             :   }
     273      191371 : }
     274             : 
     275             : /*******************************************************************************
     276             :  * \brief Integrate all tasks of in given list from given grids.
     277             :  *        See grid_task_list.h for details.
     278             :  * \author Ole Schuett
     279             :  ******************************************************************************/
     280      178825 : void grid_integrate_task_list(
     281             :     const grid_task_list *task_list, const bool compute_tau, const int natoms,
     282             :     const int nlevels, const int npts_local[nlevels][3],
     283             :     const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
     284             :     offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
     285             : 
     286             :   // Bounds check.
     287      178825 :   assert(task_list->nlevels == nlevels);
     288      888331 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     289      709506 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     290      709506 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     291      709506 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     292             :   }
     293             : 
     294      178825 :   assert(forces == NULL || pab_blocks != NULL);
     295      178825 :   assert(virial == NULL || pab_blocks != NULL);
     296             : 
     297      178825 :   switch (task_list->backend) {
     298             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     299             :   case GRID_BACKEND_GPU:
     300             :     grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
     301             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     302             :     break;
     303             : #endif
     304             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     305             :   case GRID_BACKEND_HIP:
     306             :     grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
     307             :                                  pab_blocks, grids, hab_blocks, &forces[0][0],
     308             :                                  &virial[0][0]);
     309             :     break;
     310             : #endif
     311         156 :   case GRID_BACKEND_DGEMM:
     312         156 :     grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
     313             :                                    nlevels, pab_blocks, grids, hab_blocks,
     314             :                                    forces, virial);
     315         156 :     break;
     316      178645 :   case GRID_BACKEND_CPU:
     317      178645 :     grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
     318             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     319      178645 :     break;
     320          24 :   case GRID_BACKEND_REF:
     321          24 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     322             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     323          24 :     break;
     324           0 :   default:
     325           0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     326           0 :     abort();
     327      178825 :     break;
     328             :   }
     329             : 
     330             :   // Perform validation if enabled.
     331      178825 :   if (grid_library_get_config().validate) {
     332             :     // Allocate space for reference results.
     333          24 :     const int hab_length = hab_blocks->size / sizeof(double);
     334          24 :     offload_buffer *hab_blocks_ref = NULL;
     335          24 :     offload_create_buffer(hab_length, &hab_blocks_ref);
     336          24 :     double forces_ref[natoms][3], virial_ref[3][3];
     337             : 
     338             :     // Call reference implementation.
     339          48 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     340             :                                  pab_blocks, grids, hab_blocks_ref,
     341             :                                  (forces != NULL) ? forces_ref : NULL,
     342             :                                  (virial != NULL) ? virial_ref : NULL);
     343             : 
     344             :     // Compare hab.
     345          24 :     const double hab_tolerance = 1e-12;
     346          24 :     double hab_max_rel_diff = 0.0;
     347        2025 :     for (int i = 0; i < hab_length; i++) {
     348        2001 :       const double ref_value = hab_blocks_ref->host_buffer[i];
     349        2001 :       const double test_value = hab_blocks->host_buffer[i];
     350        2001 :       const double diff = fabs(test_value - ref_value);
     351        2001 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     352        2001 :       hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
     353        2001 :       if (rel_diff > hab_tolerance) {
     354           0 :         fprintf(stderr, "Error: Validation failure in grid integrate\n");
     355           0 :         fprintf(stderr, "   hab diff:     %le\n", diff);
     356           0 :         fprintf(stderr, "   hab rel_diff: %le\n", rel_diff);
     357           0 :         fprintf(stderr, "   hab value:    %le\n", ref_value);
     358           0 :         fprintf(stderr, "   hab i:        %i\n", i);
     359           0 :         abort();
     360             :       }
     361             :     }
     362             : 
     363             :     // Compare forces.
     364          24 :     const double forces_tolerance = 1e-8; // account for higher numeric noise
     365          24 :     double forces_max_rel_diff = 0.0;
     366          24 :     if (forces != NULL) {
     367          14 :       for (int iatom = 0; iatom < natoms; iatom++) {
     368          40 :         for (int idir = 0; idir < 3; idir++) {
     369          30 :           const double ref_value = forces_ref[iatom][idir];
     370          30 :           const double test_value = forces[iatom][idir];
     371          30 :           const double diff = fabs(test_value - ref_value);
     372          30 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     373          30 :           forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
     374          30 :           if (rel_diff > forces_tolerance) {
     375           0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     376           0 :             fprintf(stderr, "   forces diff:     %le\n", diff);
     377           0 :             fprintf(stderr, "   forces rel_diff: %le\n", rel_diff);
     378           0 :             fprintf(stderr, "   forces value:    %le\n", ref_value);
     379           0 :             fprintf(stderr, "   forces atom:     %i\n", iatom);
     380           0 :             fprintf(stderr, "   forces dir:      %i\n", idir);
     381           0 :             abort();
     382             :           }
     383             :         }
     384             :       }
     385             :     }
     386             : 
     387             :     // Compare virial.
     388          24 :     const double virial_tolerance = 1e-8; // account for higher numeric noise
     389          24 :     double virial_max_rel_diff = 0.0;
     390          24 :     if (virial != NULL) {
     391           0 :       for (int i = 0; i < 3; i++) {
     392           0 :         for (int j = 0; j < 3; j++) {
     393           0 :           const double ref_value = virial_ref[i][j];
     394           0 :           const double test_value = virial[i][j];
     395           0 :           const double diff = fabs(test_value - ref_value);
     396           0 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     397           0 :           virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
     398           0 :           if (rel_diff > virial_tolerance) {
     399           0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     400           0 :             fprintf(stderr, "   virial diff:     %le\n", diff);
     401           0 :             fprintf(stderr, "   virial rel_diff: %le\n", rel_diff);
     402           0 :             fprintf(stderr, "   virial value:    %le\n", ref_value);
     403           0 :             fprintf(stderr, "   virial ij:       %i  %i\n", i, j);
     404           0 :             abort();
     405             :           }
     406             :         }
     407             :       }
     408             :     }
     409             : 
     410          24 :     printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
     411             :            hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
     412          24 :     offload_free_buffer(hab_blocks_ref);
     413             :   }
     414      178825 : }
     415             : 
     416             : // EOF

Generated by: LCOV version 1.15