LCOV - code coverage report
Current view: top level - src/grid - grid_replay.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:85b8a9b) Lines: 88.4 % 302 267
Test Date: 2026-06-14 06:48:14 Functions: 100.0 % 9 9

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <assert.h>
       9              : #include <fenv.h>
      10              : #include <limits.h>
      11              : #include <math.h>
      12              : #include <omp.h>
      13              : #include <stdarg.h>
      14              : #include <stdbool.h>
      15              : #include <stdio.h>
      16              : #include <stdlib.h>
      17              : #include <string.h>
      18              : 
      19              : #include "../offload/offload_buffer.h"
      20              : #include "common/grid_common.h"
      21              : #include "grid_replay.h"
      22              : 
      23              : #include "cpu/grid_cpu_collocate.h"
      24              : #include "cpu/grid_cpu_integrate.h"
      25              : #include "grid_task_list.h"
      26              : #include "grid_task_list_internal.h"
      27              : 
      28              : /*******************************************************************************
      29              :  * \brief Reads next line from given filehandle and handles errors.
      30              :  * \author Ole Schuett
      31              :  ******************************************************************************/
      32       655456 : static void read_next_line(char line[], int length, FILE *fp) {
      33      1310912 :   if (fgets(line, length, fp) == NULL) {
      34            0 :     fprintf(stderr, "Error: Could not read line.\n");
      35            0 :     abort();
      36              :   }
      37       655456 : }
      38              : 
      39              : /*******************************************************************************
      40              :  * \brief Shorthand for parsing a single integer value.
      41              :  * \author Ole Schuett
      42              :  ******************************************************************************/
      43         1248 : static int parse_int(const char key[], FILE *fp) {
      44         1248 :   int value;
      45         1248 :   char line[100], format[100];
      46         1248 :   read_next_line(line, sizeof(line), fp);
      47         1248 :   snprintf(format, sizeof(format), "%s %%i", key);
      48         1248 :   if (sscanf(line, format, &value) != 1) {
      49            0 :     assert(!"parse_int failed");
      50              :   }
      51         1248 :   return value;
      52              : }
      53              : 
      54              : /*******************************************************************************
      55              :  * \brief Shorthand for parsing a vector of three integer values.
      56              :  * \author Ole Schuett
      57              :  ******************************************************************************/
      58          416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
      59          416 :   char line[100], format[100];
      60          416 :   read_next_line(line, sizeof(line), fp);
      61          416 :   snprintf(format, sizeof(format), "%s %%i %%i %%i", key);
      62          416 :   if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
      63            0 :     assert(!"parse_int3 failed");
      64              :   }
      65          416 : }
      66              : 
      67              : /*******************************************************************************
      68              :  * \brief Shorthand for parsing a single double value.
      69              :  * \author Ole Schuett
      70              :  ******************************************************************************/
      71          416 : static double parse_double(const char key[], FILE *fp) {
      72          416 :   double value;
      73          416 :   char line[100], format[100];
      74          416 :   read_next_line(line, sizeof(line), fp);
      75          416 :   snprintf(format, sizeof(format), "%s %%lf", key);
      76          416 :   if (sscanf(line, format, &value) != 1) {
      77            0 :     assert(!"parse_double failed");
      78              :   }
      79          416 :   return value;
      80              : }
      81              : 
      82              : /*******************************************************************************
      83              :  * \brief Shorthand for parsing a vector of three double values.
      84              :  * \author Ole Schuett
      85              :  ******************************************************************************/
      86          416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
      87          416 :   char line[100], format[100];
      88          416 :   read_next_line(line, sizeof(line), fp);
      89          416 :   snprintf(format, sizeof(format), "%s %%lf %%lf %%lf", key);
      90          416 :   if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
      91            0 :     assert(!"parse_double3 failed");
      92              :   }
      93          416 : }
      94              : 
      95              : /*******************************************************************************
      96              :  * \brief Shorthand for parsing a 3x3 matrix of doubles.
      97              :  * \author Ole Schuett
      98              :  ******************************************************************************/
      99          312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
     100          312 :   char line[100], format[100];
     101         1248 :   for (int i = 0; i < 3; i++) {
     102          936 :     read_next_line(line, sizeof(line), fp);
     103          936 :     snprintf(format, sizeof(format), "%s %i %%lf %%lf %%lf", key, i);
     104          936 :     if (sscanf(line, format, &mat[i][0], &mat[i][1], &mat[i][2]) != 3) {
     105            0 :       assert(!"parse_double3x3 failed");
     106              :     }
     107              :   }
     108          312 : }
     109              : 
     110              : /*******************************************************************************
     111              :  * \brief Creates mock basis set using the identity as decontraction matrix.
     112              :  * \author Ole Schuett
     113              :  ******************************************************************************/
     114          104 : static void create_dummy_basis_set(const int size, const int lmin,
     115              :                                    const int lmax, const double zet,
     116          104 :                                    grid_basis_set **basis_set) {
     117              : 
     118          104 :   double sphi_mutable[size][size];
     119         1788 :   for (int i = 0; i < size; i++) {
     120        47808 :     for (int j = 0; j < size; j++) {
     121        90564 :       sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
     122              :     }
     123              :   }
     124          104 :   const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
     125              : 
     126          104 :   const int npgf = size / ncoset(lmax);
     127          104 :   assert(size == npgf * ncoset(lmax));
     128              : 
     129          104 :   const int first_sgf[1] = {1};
     130              : 
     131          104 :   double zet_array_mutable[1][npgf];
     132          808 :   for (int i = 0; i < npgf; i++) {
     133          704 :     zet_array_mutable[0][i] = zet;
     134              :   }
     135          104 :   const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
     136              : 
     137          104 :   grid_create_basis_set(/*nset=*/1,
     138              :                         /*nsgf=*/size,
     139              :                         /*maxco=*/size,
     140              :                         /*maxpgf=*/npgf,
     141              :                         /*lmin=*/&lmin,
     142              :                         /*lmax=*/&lmax,
     143              :                         /*npgf=*/&npgf,
     144              :                         /*nsgf_set=*/&size,
     145              :                         /*first_sgf=*/first_sgf,
     146              :                         /*sphi=*/sphi,
     147              :                         /*zet=*/zet_array, basis_set);
     148          104 : }
     149              : 
     150              : /*******************************************************************************
     151              :  * \brief Creates mock task list with one task per cycle.
     152              :  * \author Ole Schuett
     153              :  ******************************************************************************/
     154           52 : static void create_dummy_task_list(
     155              :     const bool orthorhombic, const int border_mask, const double ra[3],
     156              :     const double rab[3], const double radius, const grid_basis_set *basis_set_a,
     157              :     const grid_basis_set *basis_set_b, const int o1, const int o2,
     158              :     const int la_max, const int lb_max, const int cycles,
     159              :     const int cycles_per_block, const int npts_global[][3],
     160              :     const int npts_local[][3], const int shift_local[][3],
     161              :     const int border_width[][3], const double dh[][3][3],
     162           52 :     const double dh_inv[][3][3], grid_task_list **task_list) {
     163              : 
     164           52 :   const int ntasks = cycles;
     165           52 :   const int nlevels = 1;
     166           52 :   const int natoms = 2;
     167           52 :   const int nkinds = 2;
     168           52 :   int nblocks = cycles / cycles_per_block + 1;
     169              : 
     170              :   /* we can not have more blocks than the number of tasks */
     171           52 :   if (cycles == 1) {
     172           52 :     nblocks = 1;
     173              :   }
     174              : 
     175           52 :   int block_offsets[nblocks];
     176           52 :   memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
     177           52 :   const double atom_positions[2][3] = {
     178           52 :       {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
     179           52 :   const int atom_kinds[2] = {1, 2};
     180           52 :   const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
     181           52 :   const int ipgf = o1 / ncoset(la_max) + 1;
     182           52 :   const int jpgf = o2 / ncoset(lb_max) + 1;
     183           52 :   assert(o1 == (ipgf - 1) * ncoset(la_max));
     184           52 :   assert(o2 == (jpgf - 1) * ncoset(lb_max));
     185              : 
     186           52 :   int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
     187           52 :   int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
     188           52 :       jpgf_list[ntasks];
     189           52 :   int border_mask_list[ntasks], block_num_list[ntasks];
     190           52 :   double radius_list[ntasks], rab_list_mutable[ntasks][3];
     191          104 :   for (int i = 0; i < cycles; i++) {
     192           52 :     level_list[i] = 1;
     193           52 :     iatom_list[i] = 1;
     194           52 :     jatom_list[i] = 2;
     195           52 :     iset_list[i] = 1;
     196           52 :     jset_list[i] = 1;
     197           52 :     ipgf_list[i] = ipgf;
     198           52 :     jpgf_list[i] = jpgf;
     199           52 :     border_mask_list[i] = border_mask;
     200           52 :     block_num_list[i] = i / cycles_per_block + 1;
     201           52 :     radius_list[i] = radius;
     202           52 :     rab_list_mutable[i][0] = rab[0];
     203           52 :     rab_list_mutable[i][1] = rab[1];
     204           52 :     rab_list_mutable[i][2] = rab[2];
     205              :   }
     206           52 :   const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
     207              : 
     208           52 :   grid_create_task_list(
     209              :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     210              :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     211              :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
     212              :       block_num_list, radius_list, rab_list, npts_global, npts_local,
     213              :       shift_local, border_width, dh, dh_inv, task_list);
     214           52 : }
     215              : 
     216              : /*******************************************************************************
     217              :  * \brief Reads a .task file, collocates/integrates it, and compares results.
     218              :  *        See grid_replay.h for details.
     219              :  * \author Ole Schuett
     220              :  ******************************************************************************/
     221          104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
     222              :                  const bool batch, const int cycles_per_block,
     223              :                  const double tolerance) {
     224              : 
     225          104 :   if (cycles < 1) {
     226            0 :     fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
     227            0 :     exit(1);
     228              :   }
     229              : 
     230          104 :   if (cycles_per_block < 1 || cycles_per_block > cycles) {
     231            0 :     fprintf(stderr,
     232              :             "Error: Cycles per block has to be between 1 and cycles.\n");
     233            0 :     exit(1);
     234              :   }
     235              : 
     236          104 :   FILE *fp = fopen(filename, "r");
     237          104 :   if (fp == NULL) {
     238            0 :     fprintf(stderr, "Could not open task file: %s\n", filename);
     239            0 :     exit(1);
     240              :   }
     241              : 
     242          104 :   char header_line[100];
     243          104 :   read_next_line(header_line, sizeof(header_line), fp);
     244          104 :   if (strcmp(header_line, "#Grid task v10\n") != 0) {
     245            0 :     fprintf(stderr, "Error: Wrong file header.\n");
     246            0 :     abort();
     247              :   }
     248              : 
     249          104 :   const bool orthorhombic = parse_int("orthorhombic", fp);
     250          104 :   const int border_mask = parse_int("border_mask", fp);
     251          104 :   const enum grid_func func = (enum grid_func)parse_int("func", fp);
     252          104 :   const bool compute_tau = (func == GRID_FUNC_DADB);
     253          104 :   const int la_max = parse_int("la_max", fp);
     254          104 :   const int la_min = parse_int("la_min", fp);
     255          104 :   const int lb_max = parse_int("lb_max", fp);
     256          104 :   const int lb_min = parse_int("lb_min", fp);
     257          104 :   const double zeta = parse_double("zeta", fp);
     258          104 :   const double zetb = parse_double("zetb", fp);
     259          104 :   const double rscale = parse_double("rscale", fp);
     260              : 
     261          104 :   double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
     262          104 :   parse_double3x3("dh", fp, dh_mutable);
     263          104 :   parse_double3x3("dh_inv", fp, dh_inv_mutable);
     264          104 :   parse_double3("ra", fp, ra);
     265          104 :   parse_double3("rab", fp, rab);
     266          104 :   const double(*dh)[3] = (const double(*)[3])dh_mutable;
     267          104 :   const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
     268              : 
     269          104 :   int npts_global[3], npts_local[3], shift_local[3], border_width[3];
     270          104 :   parse_int3("npts_global", fp, npts_global);
     271          104 :   parse_int3("npts_local", fp, npts_local);
     272          104 :   parse_int3("shift_local", fp, shift_local);
     273          104 :   parse_int3("border_width", fp, border_width);
     274              : 
     275          104 :   const double radius = parse_double("radius", fp);
     276          104 :   const int o1 = parse_int("o1", fp);
     277          104 :   const int o2 = parse_int("o2", fp);
     278          104 :   const int n1 = parse_int("n1", fp);
     279          104 :   const int n2 = parse_int("n2", fp);
     280          104 :   const size_t n12 = (size_t)n1 * (size_t)n2;
     281              : 
     282          104 :   double *pab_storage = malloc(n12 * sizeof(double));
     283          104 :   if (pab_storage == NULL) {
     284            0 :     fprintf(stderr, "Error: Could not allocate pab buffer.\n");
     285            0 :     abort();
     286              :   }
     287         1688 :   double(*pab)[n1] = (double(*)[n1])pab_storage;
     288              :   char line[100], format[100];
     289         1688 :   for (int i = 0; i < n2; i++) {
     290        46120 :     for (int j = 0; j < n1; j++) {
     291        44536 :       read_next_line(line, sizeof(line), fp);
     292        44536 :       snprintf(format, sizeof(format), "pab %i %i %%lf", i, j);
     293        44536 :       if (sscanf(line, format, &pab[i][j]) != 1) {
     294            0 :         assert(!"parse_pab failed");
     295              :       }
     296              :     }
     297              :   }
     298              : 
     299          104 :   const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
     300          104 :   offload_buffer *grid_ref = NULL;
     301          104 :   offload_create_buffer(npts_local_total, &grid_ref);
     302          104 :   memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
     303              : 
     304          104 :   const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
     305       578328 :   for (int n = 0; n < ngrid_nonzero; n++) {
     306       578224 :     int i, j, k;
     307       578224 :     double value;
     308       578224 :     read_next_line(line, sizeof(line), fp);
     309       578224 :     if (sscanf(line, "grid %i %i %i %le", &i, &j, &k, &value) != 4) {
     310            0 :       assert(!"parse_grid failed");
     311              :     }
     312       578224 :     grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
     313       578224 :                           j * npts_local[0] + i] = value;
     314              :   }
     315              : 
     316          104 :   double *hab_ref_storage = calloc(n12, sizeof(double));
     317          104 :   if (hab_ref_storage == NULL) {
     318            0 :     fprintf(stderr, "Error: Could not allocate hab_ref buffer.\n");
     319            0 :     abort();
     320              :   }
     321          896 :   double(*hab_ref)[n1] = (double(*)[n1])hab_ref_storage;
     322          896 :   for (int i = o2; i < ncoset(lb_max) + o2; i++) {
     323        29848 :     for (int j = o1; j < ncoset(la_max) + o1; j++) {
     324        29056 :       read_next_line(line, sizeof(line), fp);
     325        29056 :       snprintf(format, sizeof(format), "hab %i %i %%lf", i, j);
     326        29056 :       if (sscanf(line, format, &hab_ref[i][j]) != 1) {
     327            0 :         assert(!"parse_hab failed");
     328              :       }
     329              :     }
     330              :   }
     331              : 
     332          104 :   double forces_ref[2][3];
     333          104 :   parse_double3("force_a", fp, forces_ref[0]);
     334          104 :   parse_double3("force_b", fp, forces_ref[1]);
     335              : 
     336          104 :   double virial_ref[3][3];
     337          104 :   parse_double3x3("virial", fp, virial_ref);
     338              : 
     339          104 :   char footer_line[100];
     340          104 :   read_next_line(footer_line, sizeof(footer_line), fp);
     341          104 :   if (strcmp(footer_line, "#THE_END\n") != 0) {
     342            0 :     fprintf(stderr, "Error: Wrong footer line.\n");
     343            0 :     abort();
     344              :   }
     345              : 
     346          104 :   if (fclose(fp) != 0) {
     347            0 :     fprintf(stderr, "Could not close task file: %s\n", filename);
     348            0 :     abort();
     349              :   }
     350              : 
     351          104 :   offload_buffer *grid_test = NULL;
     352          104 :   offload_create_buffer(npts_local_total, &grid_test);
     353          104 :   double *hab_test_storage = calloc(n12, sizeof(double));
     354          104 :   if (hab_test_storage == NULL) {
     355            0 :     fprintf(stderr, "Error: Could not allocate hab_test buffer.\n");
     356            0 :     abort();
     357              :   }
     358          104 :   double(*hab_test)[n1] = (double(*)[n1])hab_test_storage;
     359          104 :   double forces_test[2][3];
     360          104 :   double virial_test[3][3];
     361          104 :   double start_time, end_time;
     362              : 
     363          104 :   if (batch) {
     364           52 :     grid_basis_set *basisa = NULL, *basisb = NULL;
     365           52 :     create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
     366           52 :     create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
     367           52 :     grid_task_list *task_list = NULL;
     368           52 :     create_dummy_task_list(
     369              :         orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
     370              :         la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
     371              :         (const int(*)[3])npts_local, (const int(*)[3])shift_local,
     372              :         (const int(*)[3])border_width, (const double(*)[3][3])dh,
     373              :         (const double(*)[3][3])dh_inv, &task_list);
     374           52 :     offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
     375           52 :     offload_create_buffer(n1 * n2, &pab_blocks);
     376           52 :     offload_create_buffer(n1 * n2, &hab_blocks);
     377           52 :     const double f = (collocate) ? rscale : 1.0;
     378          944 :     for (int i = 0; i < n1; i++) {
     379        23160 :       for (int j = 0; j < n2; j++) {
     380        22268 :         pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
     381              :       }
     382              :     }
     383           52 :     start_time = omp_get_wtime();
     384           52 :     const int nlevels = 1;
     385           52 :     const int natoms = 2;
     386           52 :     if (collocate) {
     387              :       // collocate
     388           26 :       offload_buffer *grids[1] = {grid_test};
     389           26 :       grid_collocate_task_list(task_list, func, nlevels,
     390              :                                (const int(*)[3])npts_local, pab_blocks, grids);
     391              :     } else {
     392              :       // integrate
     393           26 :       const offload_buffer *grids[1] = {grid_ref};
     394           26 :       grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
     395              :                                (const int(*)[3])npts_local, pab_blocks, grids,
     396              :                                hab_blocks, forces_test, virial_test);
     397          422 :       for (int i = 0; i < n2; i++) {
     398        11530 :         for (int j = 0; j < n1; j++) {
     399        11134 :           hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
     400              :         }
     401              :       }
     402              :     }
     403           52 :     end_time = omp_get_wtime();
     404           52 :     grid_free_basis_set(basisa);
     405           52 :     grid_free_basis_set(basisb);
     406           52 :     grid_free_task_list(task_list);
     407           52 :     offload_free_buffer(pab_blocks);
     408           52 :     offload_free_buffer(hab_blocks);
     409              :   } else {
     410           52 :     start_time = omp_get_wtime();
     411           52 :     if (collocate) {
     412              :       // collocate
     413           26 :       memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
     414           52 :       for (int i = 0; i < cycles; i++) {
     415           26 :         grid_cpu_collocate_pgf_product(
     416              :             orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
     417              :             zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
     418              :             shift_local, border_width, radius, o1, o2, n1, n2, pab,
     419           26 :             grid_test->host_buffer);
     420              :       }
     421              :     } else {
     422              :       // integrate
     423           26 :       memset(hab_test_storage, 0, n12 * sizeof(double));
     424           26 :       memset(forces_test, 0, 2 * 3 * sizeof(double));
     425           26 :       double virials_test[2][3][3] = {0};
     426           52 :       for (int i = 0; i < cycles; i++) {
     427           26 :         grid_cpu_integrate_pgf_product(
     428              :             orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
     429              :             lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
     430              :             shift_local, border_width, radius, o1, o2, n1, n2,
     431           26 :             grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
     432              :             NULL, NULL, NULL);
     433              :       }
     434          104 :       for (int i = 0; i < 3; i++) {
     435          312 :         for (int j = 0; j < 3; j++) {
     436          234 :           virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
     437              :         }
     438              :       }
     439              :     }
     440           52 :     end_time = omp_get_wtime();
     441              :   }
     442              : 
     443          104 :   double max_value = 0.0;
     444          104 :   double max_rel_diff = 0.0;
     445          104 :   const double derivatives_precision = 1e-4; // account for higher numeric noise
     446          104 :   if (collocate) {
     447              :     // collocate
     448              :     // compare grid
     449      7151464 :     for (int i = 0; i < npts_local_total; i++) {
     450      7151412 :       const double ref_value = cycles * grid_ref->host_buffer[i];
     451      7151412 :       const double test_value = grid_test->host_buffer[i];
     452      7151412 :       const double diff = fabs(test_value - ref_value);
     453      7151412 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     454      7151412 :       max_rel_diff = fmax(max_rel_diff, rel_diff);
     455      7151412 :       max_value = fmax(max_value, fabs(test_value));
     456              :     }
     457              :   } else {
     458              :     // integrate
     459              :     // compare hab
     460          844 :     for (int i = 0; i < n2; i++) {
     461        23060 :       for (int j = 0; j < n1; j++) {
     462        22268 :         const double ref_value = cycles * hab_ref[i][j];
     463        22268 :         const double test_value = hab_test[i][j];
     464        22268 :         const double diff = fabs(test_value - ref_value);
     465        22268 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     466        22268 :         max_rel_diff = fmax(max_rel_diff, rel_diff);
     467        22268 :         max_value = fmax(max_value, fabs(test_value));
     468        22268 :         if (rel_diff > tolerance) {
     469            0 :           printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
     470              :                  j, ref_value, test_value, diff, rel_diff);
     471              :         }
     472              :       }
     473              :     }
     474              :     // compare forces
     475          156 :     for (int i = 0; i < 2; i++) {
     476          416 :       for (int j = 0; j < 3; j++) {
     477          312 :         const double ref_value = cycles * forces_ref[i][j];
     478          312 :         const double test_value = forces_test[i][j];
     479          312 :         const double diff = fabs(test_value - ref_value);
     480          312 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     481          312 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     482          312 :         if (rel_diff * derivatives_precision > tolerance) {
     483            0 :           printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     484              :                  i, j, ref_value, test_value, diff, rel_diff);
     485              :         }
     486              :       }
     487              :     }
     488              :     // compare virial
     489          208 :     for (int i = 0; i < 3; i++) {
     490          624 :       for (int j = 0; j < 3; j++) {
     491          468 :         const double ref_value = cycles * virial_ref[i][j];
     492          468 :         const double test_value = virial_test[i][j];
     493          468 :         const double diff = fabs(test_value - ref_value);
     494          468 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     495          468 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     496          468 :         if (rel_diff * derivatives_precision > tolerance) {
     497            0 :           printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     498              :                  i, j, ref_value, test_value, diff, rel_diff);
     499              :         }
     500              :       }
     501              :     }
     502              :   }
     503          104 :   printf("Task: %-55s   %9s %-7s   Cycles: %e   Max value: %le   "
     504              :          "Max rel diff: %le   Time: %le sec\n",
     505              :          filename, collocate ? "Collocate" : "Integrate",
     506          104 :          batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
     507              :          end_time - start_time);
     508              : 
     509          104 :   offload_free_buffer(grid_ref);
     510          104 :   offload_free_buffer(grid_test);
     511          104 :   free(pab_storage);
     512          104 :   free(hab_ref_storage);
     513          104 :   free(hab_test_storage);
     514              : 
     515              :   // Check floating point exceptions.
     516          104 :   if (fetestexcept(FE_DIVBYZERO) != 0) {
     517            0 :     fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
     518            0 :     exit(1);
     519              :   }
     520          104 :   if (fetestexcept(FE_OVERFLOW) != 0) {
     521            0 :     fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
     522            0 :     exit(1);
     523              :   }
     524              : 
     525          104 :   return max_rel_diff < tolerance;
     526              : }
     527              : 
     528              : // EOF
        

Generated by: LCOV version 2.0-1