LCOV - code coverage report
Current view: top level - src/grid - grid_replay.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 90.0 % 290 261
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 9 9

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <assert.h>
       9              : #include <fenv.h>
      10              : #include <limits.h>
      11              : #include <math.h>
      12              : #include <omp.h>
      13              : #include <stdarg.h>
      14              : #include <stdbool.h>
      15              : #include <stdio.h>
      16              : #include <stdlib.h>
      17              : #include <string.h>
      18              : 
      19              : #include "../offload/offload_buffer.h"
      20              : #include "common/grid_common.h"
      21              : #include "grid_replay.h"
      22              : 
      23              : #include "cpu/grid_cpu_collocate.h"
      24              : #include "cpu/grid_cpu_integrate.h"
      25              : #include "grid_task_list.h"
      26              : 
      27              : /*******************************************************************************
      28              :  * \brief Reads next line from given filehandle and handles errors.
      29              :  * \author Ole Schuett
      30              :  ******************************************************************************/
      31       655456 : static void read_next_line(char line[], int length, FILE *fp) {
      32      1310912 :   if (fgets(line, length, fp) == NULL) {
      33            0 :     fprintf(stderr, "Error: Could not read line.\n");
      34            0 :     abort();
      35              :   }
      36       655456 : }
      37              : 
      38              : /*******************************************************************************
      39              :  * \brief Shorthand for parsing a single integer value.
      40              :  * \author Ole Schuett
      41              :  ******************************************************************************/
      42         1248 : static int parse_int(const char key[], FILE *fp) {
      43         1248 :   int value;
      44         1248 :   char line[100], format[100];
      45         1248 :   read_next_line(line, sizeof(line), fp);
      46         1248 :   snprintf(format, sizeof(format), "%s %%i", key);
      47         1248 :   if (sscanf(line, format, &value) != 1) {
      48            0 :     assert(!"parse_int failed");
      49              :   }
      50         1248 :   return value;
      51              : }
      52              : 
      53              : /*******************************************************************************
      54              :  * \brief Shorthand for parsing a vector of three integer values.
      55              :  * \author Ole Schuett
      56              :  ******************************************************************************/
      57          416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
      58          416 :   char line[100], format[100];
      59          416 :   read_next_line(line, sizeof(line), fp);
      60          416 :   snprintf(format, sizeof(format), "%s %%i %%i %%i", key);
      61          416 :   if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
      62            0 :     assert(!"parse_int3 failed");
      63              :   }
      64          416 : }
      65              : 
      66              : /*******************************************************************************
      67              :  * \brief Shorthand for parsing a single double value.
      68              :  * \author Ole Schuett
      69              :  ******************************************************************************/
      70          416 : static double parse_double(const char key[], FILE *fp) {
      71          416 :   double value;
      72          416 :   char line[100], format[100];
      73          416 :   read_next_line(line, sizeof(line), fp);
      74          416 :   snprintf(format, sizeof(format), "%s %%lf", key);
      75          416 :   if (sscanf(line, format, &value) != 1) {
      76            0 :     assert(!"parse_double failed");
      77              :   }
      78          416 :   return value;
      79              : }
      80              : 
      81              : /*******************************************************************************
      82              :  * \brief Shorthand for parsing a vector of three double values.
      83              :  * \author Ole Schuett
      84              :  ******************************************************************************/
      85          416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
      86          416 :   char line[100], format[100];
      87          416 :   read_next_line(line, sizeof(line), fp);
      88          416 :   snprintf(format, sizeof(format), "%s %%lf %%lf %%lf", key);
      89          416 :   if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
      90            0 :     assert(!"parse_double3 failed");
      91              :   }
      92          416 : }
      93              : 
      94              : /*******************************************************************************
      95              :  * \brief Shorthand for parsing a 3x3 matrix of doubles.
      96              :  * \author Ole Schuett
      97              :  ******************************************************************************/
      98          312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
      99          312 :   char line[100], format[100];
     100         1248 :   for (int i = 0; i < 3; i++) {
     101          936 :     read_next_line(line, sizeof(line), fp);
     102          936 :     snprintf(format, sizeof(format), "%s %i %%lf %%lf %%lf", key, i);
     103          936 :     if (sscanf(line, format, &mat[i][0], &mat[i][1], &mat[i][2]) != 3) {
     104            0 :       assert(!"parse_double3x3 failed");
     105              :     }
     106              :   }
     107          312 : }
     108              : 
     109              : /*******************************************************************************
     110              :  * \brief Creates mock basis set using the identity as decontraction matrix.
     111              :  * \author Ole Schuett
     112              :  ******************************************************************************/
     113          104 : static void create_dummy_basis_set(const int size, const int lmin,
     114              :                                    const int lmax, const double zet,
     115          104 :                                    grid_basis_set **basis_set) {
     116              : 
     117          104 :   double sphi_mutable[size][size];
     118         1788 :   for (int i = 0; i < size; i++) {
     119        47808 :     for (int j = 0; j < size; j++) {
     120        90564 :       sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
     121              :     }
     122              :   }
     123          104 :   const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
     124              : 
     125          104 :   const int npgf = size / ncoset(lmax);
     126          104 :   assert(size == npgf * ncoset(lmax));
     127              : 
     128          104 :   const int first_sgf[1] = {1};
     129              : 
     130          104 :   double zet_array_mutable[1][npgf];
     131          808 :   for (int i = 0; i < npgf; i++) {
     132          704 :     zet_array_mutable[0][i] = zet;
     133              :   }
     134          104 :   const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
     135              : 
     136          104 :   grid_create_basis_set(/*nset=*/1,
     137              :                         /*nsgf=*/size,
     138              :                         /*maxco=*/size,
     139              :                         /*maxpgf=*/npgf,
     140              :                         /*lmin=*/&lmin,
     141              :                         /*lmax=*/&lmax,
     142              :                         /*npgf=*/&npgf,
     143              :                         /*nsgf_set=*/&size,
     144              :                         /*first_sgf=*/first_sgf,
     145              :                         /*sphi=*/sphi,
     146              :                         /*zet=*/zet_array, basis_set);
     147          104 : }
     148              : 
     149              : /*******************************************************************************
     150              :  * \brief Creates mock task list with one task per cycle.
     151              :  * \author Ole Schuett
     152              :  ******************************************************************************/
     153           52 : static void create_dummy_task_list(
     154              :     const bool orthorhombic, const int border_mask, const double ra[3],
     155              :     const double rab[3], const double radius, const grid_basis_set *basis_set_a,
     156              :     const grid_basis_set *basis_set_b, const int o1, const int o2,
     157              :     const int la_max, const int lb_max, const int cycles,
     158              :     const int cycles_per_block, const int npts_global[][3],
     159              :     const int npts_local[][3], const int shift_local[][3],
     160              :     const int border_width[][3], const double dh[][3][3],
     161           52 :     const double dh_inv[][3][3], grid_task_list **task_list) {
     162              : 
     163           52 :   const int ntasks = cycles;
     164           52 :   const int nlevels = 1;
     165           52 :   const int natoms = 2;
     166           52 :   const int nkinds = 2;
     167           52 :   int nblocks = cycles / cycles_per_block + 1;
     168              : 
     169              :   /* we can not have more blocks than the number of tasks */
     170           52 :   if (cycles == 1) {
     171           52 :     nblocks = 1;
     172              :   }
     173              : 
     174           52 :   int block_offsets[nblocks];
     175           52 :   memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
     176           52 :   const double atom_positions[2][3] = {
     177           52 :       {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
     178           52 :   const int atom_kinds[2] = {1, 2};
     179           52 :   const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
     180           52 :   const int ipgf = o1 / ncoset(la_max) + 1;
     181           52 :   const int jpgf = o2 / ncoset(lb_max) + 1;
     182           52 :   assert(o1 == (ipgf - 1) * ncoset(la_max));
     183           52 :   assert(o2 == (jpgf - 1) * ncoset(lb_max));
     184              : 
     185           52 :   int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
     186           52 :   int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
     187           52 :       jpgf_list[ntasks];
     188           52 :   int border_mask_list[ntasks], block_num_list[ntasks];
     189           52 :   double radius_list[ntasks], rab_list_mutable[ntasks][3];
     190          104 :   for (int i = 0; i < cycles; i++) {
     191           52 :     level_list[i] = 1;
     192           52 :     iatom_list[i] = 1;
     193           52 :     jatom_list[i] = 2;
     194           52 :     iset_list[i] = 1;
     195           52 :     jset_list[i] = 1;
     196           52 :     ipgf_list[i] = ipgf;
     197           52 :     jpgf_list[i] = jpgf;
     198           52 :     border_mask_list[i] = border_mask;
     199           52 :     block_num_list[i] = i / cycles_per_block + 1;
     200           52 :     radius_list[i] = radius;
     201           52 :     rab_list_mutable[i][0] = rab[0];
     202           52 :     rab_list_mutable[i][1] = rab[1];
     203           52 :     rab_list_mutable[i][2] = rab[2];
     204              :   }
     205           52 :   const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
     206              : 
     207           52 :   grid_create_task_list(
     208              :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     209              :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     210              :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
     211              :       block_num_list, radius_list, rab_list, npts_global, npts_local,
     212              :       shift_local, border_width, dh, dh_inv, task_list);
     213           52 : }
     214              : 
     215              : /*******************************************************************************
     216              :  * \brief Reads a .task file, collocates/integrates it, and compares results.
     217              :  *        See grid_replay.h for details.
     218              :  * \author Ole Schuett
     219              :  ******************************************************************************/
     220          104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
     221              :                  const bool batch, const int cycles_per_block,
     222          104 :                  const double tolerance) {
     223              : 
     224          104 :   if (cycles < 1) {
     225            0 :     fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
     226            0 :     exit(1);
     227              :   }
     228              : 
     229          104 :   if (cycles_per_block < 1 || cycles_per_block > cycles) {
     230            0 :     fprintf(stderr,
     231              :             "Error: Cycles per block has to be between 1 and cycles.\n");
     232            0 :     exit(1);
     233              :   }
     234              : 
     235          104 :   FILE *fp = fopen(filename, "r");
     236          104 :   if (fp == NULL) {
     237            0 :     fprintf(stderr, "Could not open task file: %s\n", filename);
     238            0 :     exit(1);
     239              :   }
     240              : 
     241          104 :   char header_line[100];
     242          104 :   read_next_line(header_line, sizeof(header_line), fp);
     243          104 :   if (strcmp(header_line, "#Grid task v10\n") != 0) {
     244            0 :     fprintf(stderr, "Error: Wrong file header.\n");
     245            0 :     abort();
     246              :   }
     247              : 
     248          104 :   const bool orthorhombic = parse_int("orthorhombic", fp);
     249          104 :   const int border_mask = parse_int("border_mask", fp);
     250          104 :   const enum grid_func func = (enum grid_func)parse_int("func", fp);
     251          104 :   const bool compute_tau = (func == GRID_FUNC_DADB);
     252          104 :   const int la_max = parse_int("la_max", fp);
     253          104 :   const int la_min = parse_int("la_min", fp);
     254          104 :   const int lb_max = parse_int("lb_max", fp);
     255          104 :   const int lb_min = parse_int("lb_min", fp);
     256          104 :   const double zeta = parse_double("zeta", fp);
     257          104 :   const double zetb = parse_double("zetb", fp);
     258          104 :   const double rscale = parse_double("rscale", fp);
     259              : 
     260          104 :   double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
     261          104 :   parse_double3x3("dh", fp, dh_mutable);
     262          104 :   parse_double3x3("dh_inv", fp, dh_inv_mutable);
     263          104 :   parse_double3("ra", fp, ra);
     264          104 :   parse_double3("rab", fp, rab);
     265          104 :   const double(*dh)[3] = (const double(*)[3])dh_mutable;
     266          104 :   const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
     267              : 
     268          104 :   int npts_global[3], npts_local[3], shift_local[3], border_width[3];
     269          104 :   parse_int3("npts_global", fp, npts_global);
     270          104 :   parse_int3("npts_local", fp, npts_local);
     271          104 :   parse_int3("shift_local", fp, shift_local);
     272          104 :   parse_int3("border_width", fp, border_width);
     273              : 
     274          104 :   const double radius = parse_double("radius", fp);
     275          104 :   const int o1 = parse_int("o1", fp);
     276          104 :   const int o2 = parse_int("o2", fp);
     277          104 :   const int n1 = parse_int("n1", fp);
     278          104 :   const int n2 = parse_int("n2", fp);
     279              : 
     280          104 :   double pab_mutable[n2][n1];
     281          104 :   char line[100], format[100];
     282         1688 :   for (int i = 0; i < n2; i++) {
     283        46120 :     for (int j = 0; j < n1; j++) {
     284        44536 :       read_next_line(line, sizeof(line), fp);
     285        44536 :       snprintf(format, sizeof(format), "pab %i %i %%lf", i, j);
     286        44536 :       if (sscanf(line, format, &pab_mutable[i][j]) != 1) {
     287            0 :         assert(!"parse_pab failed");
     288              :       }
     289              :     }
     290              :   }
     291          104 :   const double(*pab)[n1] = (const double(*)[n1])pab_mutable;
     292              : 
     293          104 :   const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
     294          104 :   offload_buffer *grid_ref = NULL;
     295          104 :   offload_create_buffer(npts_local_total, &grid_ref);
     296          104 :   memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
     297              : 
     298          104 :   const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
     299       578328 :   for (int n = 0; n < ngrid_nonzero; n++) {
     300       578224 :     int i, j, k;
     301       578224 :     double value;
     302       578224 :     read_next_line(line, sizeof(line), fp);
     303       578224 :     if (sscanf(line, "grid %i %i %i %le", &i, &j, &k, &value) != 4) {
     304            0 :       assert(!"parse_grid failed");
     305              :     }
     306       578224 :     grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
     307       578224 :                           j * npts_local[0] + i] = value;
     308              :   }
     309              : 
     310          104 :   double hab_ref[n2][n1];
     311          104 :   memset(hab_ref, 0, n2 * n1 * sizeof(double));
     312          896 :   for (int i = o2; i < ncoset(lb_max) + o2; i++) {
     313        29848 :     for (int j = o1; j < ncoset(la_max) + o1; j++) {
     314        29056 :       read_next_line(line, sizeof(line), fp);
     315        29056 :       snprintf(format, sizeof(format), "hab %i %i %%lf", i, j);
     316        29056 :       if (sscanf(line, format, &hab_ref[i][j]) != 1) {
     317            0 :         assert(!"parse_hab failed");
     318              :       }
     319              :     }
     320              :   }
     321              : 
     322          104 :   double forces_ref[2][3];
     323          104 :   parse_double3("force_a", fp, forces_ref[0]);
     324          104 :   parse_double3("force_b", fp, forces_ref[1]);
     325              : 
     326          104 :   double virial_ref[3][3];
     327          104 :   parse_double3x3("virial", fp, virial_ref);
     328              : 
     329          104 :   char footer_line[100];
     330          104 :   read_next_line(footer_line, sizeof(footer_line), fp);
     331          104 :   if (strcmp(footer_line, "#THE_END\n") != 0) {
     332            0 :     fprintf(stderr, "Error: Wrong footer line.\n");
     333            0 :     abort();
     334              :   }
     335              : 
     336          104 :   if (fclose(fp) != 0) {
     337            0 :     fprintf(stderr, "Could not close task file: %s\n", filename);
     338            0 :     abort();
     339              :   }
     340              : 
     341          104 :   offload_buffer *grid_test = NULL;
     342          104 :   offload_create_buffer(npts_local_total, &grid_test);
     343          104 :   double hab_test[n2][n1];
     344          104 :   double forces_test[2][3];
     345          104 :   double virial_test[3][3];
     346          104 :   double start_time, end_time;
     347              : 
     348          104 :   if (batch) {
     349           52 :     grid_basis_set *basisa = NULL, *basisb = NULL;
     350           52 :     create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
     351           52 :     create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
     352           52 :     grid_task_list *task_list = NULL;
     353           52 :     create_dummy_task_list(
     354              :         orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
     355              :         la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
     356              :         (const int(*)[3])npts_local, (const int(*)[3])shift_local,
     357              :         (const int(*)[3])border_width, (const double(*)[3][3])dh,
     358              :         (const double(*)[3][3])dh_inv, &task_list);
     359           52 :     offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
     360           52 :     offload_create_buffer(n1 * n2, &pab_blocks);
     361           52 :     offload_create_buffer(n1 * n2, &hab_blocks);
     362           52 :     const double f = (collocate) ? rscale : 1.0;
     363          944 :     for (int i = 0; i < n1; i++) {
     364        23160 :       for (int j = 0; j < n2; j++) {
     365        22268 :         pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
     366              :       }
     367              :     }
     368           52 :     start_time = omp_get_wtime();
     369           52 :     const int nlevels = 1;
     370           52 :     const int natoms = 2;
     371           52 :     if (collocate) {
     372              :       // collocate
     373           26 :       offload_buffer *grids[1] = {grid_test};
     374           26 :       grid_collocate_task_list(task_list, func, nlevels,
     375              :                                (const int(*)[3])npts_local, pab_blocks, grids);
     376              :     } else {
     377              :       // integrate
     378           26 :       const offload_buffer *grids[1] = {grid_ref};
     379           26 :       grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
     380              :                                (const int(*)[3])npts_local, pab_blocks, grids,
     381              :                                hab_blocks, forces_test, virial_test);
     382          422 :       for (int i = 0; i < n2; i++) {
     383        11530 :         for (int j = 0; j < n1; j++) {
     384        11134 :           hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
     385              :         }
     386              :       }
     387              :     }
     388           52 :     end_time = omp_get_wtime();
     389           52 :     grid_free_basis_set(basisa);
     390           52 :     grid_free_basis_set(basisb);
     391           52 :     grid_free_task_list(task_list);
     392           52 :     offload_free_buffer(pab_blocks);
     393           52 :     offload_free_buffer(hab_blocks);
     394              :   } else {
     395           52 :     start_time = omp_get_wtime();
     396           52 :     if (collocate) {
     397              :       // collocate
     398           26 :       memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
     399           52 :       for (int i = 0; i < cycles; i++) {
     400           26 :         grid_cpu_collocate_pgf_product(
     401              :             orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
     402              :             zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
     403              :             shift_local, border_width, radius, o1, o2, n1, n2, pab,
     404           26 :             grid_test->host_buffer);
     405              :       }
     406              :     } else {
     407              :       // integrate
     408           26 :       memset(hab_test, 0, n2 * n1 * sizeof(double));
     409           26 :       memset(forces_test, 0, 2 * 3 * sizeof(double));
     410           26 :       double virials_test[2][3][3] = {0};
     411           52 :       for (int i = 0; i < cycles; i++) {
     412           26 :         grid_cpu_integrate_pgf_product(
     413              :             orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
     414              :             lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
     415              :             shift_local, border_width, radius, o1, o2, n1, n2,
     416           26 :             grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
     417              :             NULL, NULL, NULL);
     418              :       }
     419          104 :       for (int i = 0; i < 3; i++) {
     420          312 :         for (int j = 0; j < 3; j++) {
     421          234 :           virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
     422              :         }
     423              :       }
     424              :     }
     425           52 :     end_time = omp_get_wtime();
     426              :   }
     427              : 
     428          104 :   double max_value = 0.0;
     429          104 :   double max_rel_diff = 0.0;
     430          104 :   const double derivatives_precision = 1e-4; // account for higher numeric noise
     431          104 :   if (collocate) {
     432              :     // collocate
     433              :     // compare grid
     434      7151464 :     for (int i = 0; i < npts_local_total; i++) {
     435      7151412 :       const double ref_value = cycles * grid_ref->host_buffer[i];
     436      7151412 :       const double test_value = grid_test->host_buffer[i];
     437      7151412 :       const double diff = fabs(test_value - ref_value);
     438      7151412 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     439      7151412 :       max_rel_diff = fmax(max_rel_diff, rel_diff);
     440      7151412 :       max_value = fmax(max_value, fabs(test_value));
     441              :     }
     442              :   } else {
     443              :     // integrate
     444              :     // compare hab
     445          844 :     for (int i = 0; i < n2; i++) {
     446        23060 :       for (int j = 0; j < n1; j++) {
     447        22268 :         const double ref_value = cycles * hab_ref[i][j];
     448        22268 :         const double test_value = hab_test[i][j];
     449        22268 :         const double diff = fabs(test_value - ref_value);
     450        22268 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     451        22268 :         max_rel_diff = fmax(max_rel_diff, rel_diff);
     452        22268 :         max_value = fmax(max_value, fabs(test_value));
     453        22268 :         if (rel_diff > tolerance) {
     454            0 :           printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
     455              :                  j, ref_value, test_value, diff, rel_diff);
     456              :         }
     457              :       }
     458              :     }
     459              :     // compare forces
     460          156 :     for (int i = 0; i < 2; i++) {
     461          416 :       for (int j = 0; j < 3; j++) {
     462          312 :         const double ref_value = cycles * forces_ref[i][j];
     463          312 :         const double test_value = forces_test[i][j];
     464          312 :         const double diff = fabs(test_value - ref_value);
     465          312 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     466          312 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     467          312 :         if (rel_diff * derivatives_precision > tolerance) {
     468            0 :           printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     469              :                  i, j, ref_value, test_value, diff, rel_diff);
     470              :         }
     471              :       }
     472              :     }
     473              :     // compare virial
     474          208 :     for (int i = 0; i < 3; i++) {
     475          624 :       for (int j = 0; j < 3; j++) {
     476          468 :         const double ref_value = cycles * virial_ref[i][j];
     477          468 :         const double test_value = virial_test[i][j];
     478          468 :         const double diff = fabs(test_value - ref_value);
     479          468 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     480          468 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     481          468 :         if (rel_diff * derivatives_precision > tolerance) {
     482            0 :           printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     483              :                  i, j, ref_value, test_value, diff, rel_diff);
     484              :         }
     485              :       }
     486              :     }
     487              :   }
     488          104 :   printf("Task: %-55s   %9s %-7s   Cycles: %e   Max value: %le   "
     489              :          "Max rel diff: %le   Time: %le sec\n",
     490              :          filename, collocate ? "Collocate" : "Integrate",
     491          104 :          batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
     492              :          end_time - start_time);
     493              : 
     494          104 :   offload_free_buffer(grid_ref);
     495          104 :   offload_free_buffer(grid_test);
     496              : 
     497              :   // Check floating point exceptions.
     498          104 :   if (fetestexcept(FE_DIVBYZERO) != 0) {
     499            0 :     fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
     500            0 :     exit(1);
     501              :   }
     502          104 :   if (fetestexcept(FE_OVERFLOW) != 0) {
     503            0 :     fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
     504            0 :     exit(1);
     505              :   }
     506              : 
     507          104 :   return max_rel_diff < tolerance;
     508              : }
     509              : 
     510              : // EOF
        

Generated by: LCOV version 2.0-1