LCOV - code coverage report
Current view: top level - src/grid - grid_replay.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:e7e05ae) Lines: 259 279 92.8 %
Date: 2024-04-18 06:59:28 Functions: 10 10 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <fenv.h>
      10             : #include <limits.h>
      11             : #include <math.h>
      12             : #include <omp.h>
      13             : #include <stdarg.h>
      14             : #include <stdbool.h>
      15             : #include <stdio.h>
      16             : #include <stdlib.h>
      17             : #include <string.h>
      18             : 
      19             : #include "../offload/offload_buffer.h"
      20             : #include "common/grid_common.h"
      21             : #include "grid_replay.h"
      22             : 
      23             : #include "cpu/grid_cpu_collocate.h"
      24             : #include "cpu/grid_cpu_integrate.h"
      25             : #include "grid_task_list.h"
      26             : 
      27             : /*******************************************************************************
      28             :  * \brief Reads next line from given filehandle and handles errors.
      29             :  * \author Ole Schuett
      30             :  ******************************************************************************/
      31      655456 : static void read_next_line(char line[], int length, FILE *fp) {
      32     1310912 :   if (fgets(line, length, fp) == NULL) {
      33           0 :     fprintf(stderr, "Error: Could not read line.\n");
      34           0 :     abort();
      35             :   }
      36      655456 : }
      37             : 
      38             : /*******************************************************************************
      39             :  * \brief Parses next line from file, expecting it to match "${key} ${format}".
      40             :  * \author Ole Schuett
      41             :  ******************************************************************************/
      42      655248 : static void parse_next_line(const char key[], FILE *fp, const char format[],
      43             :                             const int nargs, ...) {
      44      655248 :   char line[100];
      45      655248 :   read_next_line(line, sizeof(line), fp);
      46             : 
      47      655248 :   char full_format[100];
      48      655248 :   strcpy(full_format, key);
      49      655248 :   strcat(full_format, " ");
      50      655248 :   strcat(full_format, format);
      51             : 
      52      655248 :   va_list varargs;
      53      655248 :   va_start(varargs, nargs);
      54      655248 :   if (vsscanf(line, full_format, varargs) != nargs) {
      55           0 :     fprintf(stderr, "Error: Could not parse line.\n");
      56           0 :     fprintf(stderr, "Line: %s\n", line);
      57           0 :     fprintf(stderr, "Format: %s\n", full_format);
      58           0 :     abort();
      59             :   }
      60      655248 :   va_end(varargs);
      61      655248 : }
      62             : 
      63             : /*******************************************************************************
      64             :  * \brief Shorthand for parsing a single integer value.
      65             :  * \author Ole Schuett
      66             :  ******************************************************************************/
      67        1248 : static int parse_int(const char key[], FILE *fp) {
      68        1248 :   int value;
      69        1248 :   parse_next_line(key, fp, "%i", 1, &value);
      70        1248 :   return value;
      71             : }
      72             : 
      73             : /*******************************************************************************
      74             :  * \brief Shorthand for parsing a vector of three integer values.
      75             :  * \author Ole Schuett
      76             :  ******************************************************************************/
      77         416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
      78         416 :   parse_next_line(key, fp, "%i %i %i", 3, &vec[0], &vec[1], &vec[2]);
      79         416 : }
      80             : 
      81             : /*******************************************************************************
      82             :  * \brief Shorthand for parsing a single double value.
      83             :  * \author Ole Schuett
      84             :  ******************************************************************************/
      85         416 : static double parse_double(const char key[], FILE *fp) {
      86         416 :   double value;
      87         416 :   parse_next_line(key, fp, "%le", 1, &value);
      88         416 :   return value;
      89             : }
      90             : 
      91             : /*******************************************************************************
      92             :  * \brief Shorthand for parsing a vector of three double values.
      93             :  * \author Ole Schuett
      94             :  ******************************************************************************/
      95         416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
      96         416 :   parse_next_line(key, fp, "%le %le %le", 3, &vec[0], &vec[1], &vec[2]);
      97         416 : }
      98             : 
      99             : /*******************************************************************************
     100             :  * \brief Shorthand for parsing a 3x3 matrix of doubles.
     101             :  * \author Ole Schuett
     102             :  ******************************************************************************/
     103         312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
     104         312 :   char format[100];
     105        1248 :   for (int i = 0; i < 3; i++) {
     106         936 :     sprintf(format, "%i %%le %%le %%le", i);
     107         936 :     parse_next_line(key, fp, format, 3, &mat[i][0], &mat[i][1], &mat[i][2]);
     108             :   }
     109         312 : }
     110             : 
     111             : /*******************************************************************************
     112             :  * \brief Creates mock basis set using the identity as decontraction matrix.
     113             :  * \author Ole Schuett
     114             :  ******************************************************************************/
     115         104 : static void create_dummy_basis_set(const int size, const int lmin,
     116             :                                    const int lmax, const double zet,
     117         104 :                                    grid_basis_set **basis_set) {
     118             : 
     119         104 :   double sphi_mutable[size][size];
     120        1788 :   for (int i = 0; i < size; i++) {
     121       47808 :     for (int j = 0; j < size; j++) {
     122       90564 :       sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
     123             :     }
     124             :   }
     125         104 :   const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
     126             : 
     127         104 :   const int npgf = size / ncoset(lmax);
     128         104 :   assert(size == npgf * ncoset(lmax));
     129             : 
     130         104 :   const int first_sgf[1] = {1};
     131             : 
     132         104 :   double zet_array_mutable[1][npgf];
     133         808 :   for (int i = 0; i < npgf; i++) {
     134         704 :     zet_array_mutable[0][i] = zet;
     135             :   }
     136         104 :   const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
     137             : 
     138         104 :   grid_create_basis_set(/*nset=*/1,
     139             :                         /*nsgf=*/size,
     140             :                         /*maxco=*/size,
     141             :                         /*maxpgf=*/npgf,
     142             :                         /*lmin=*/&lmin,
     143             :                         /*lmax=*/&lmax,
     144             :                         /*npgf=*/&npgf,
     145             :                         /*nsgf_set=*/&size,
     146             :                         /*first_sgf=*/first_sgf,
     147             :                         /*sphi=*/sphi,
     148             :                         /*zet=*/zet_array, basis_set);
     149         104 : }
     150             : 
     151             : /*******************************************************************************
     152             :  * \brief Creates mock task list with one task per cycle.
     153             :  * \author Ole Schuett
     154             :  ******************************************************************************/
     155          52 : static void create_dummy_task_list(
     156             :     const bool orthorhombic, const int border_mask, const double ra[3],
     157             :     const double rab[3], const double radius, const grid_basis_set *basis_set_a,
     158             :     const grid_basis_set *basis_set_b, const int o1, const int o2,
     159             :     const int la_max, const int lb_max, const int cycles,
     160             :     const int cycles_per_block, const int npts_global[][3],
     161             :     const int npts_local[][3], const int shift_local[][3],
     162             :     const int border_width[][3], const double dh[][3][3],
     163          52 :     const double dh_inv[][3][3], grid_task_list **task_list) {
     164             : 
     165          52 :   const int ntasks = cycles;
     166          52 :   const int nlevels = 1;
     167          52 :   const int natoms = 2;
     168          52 :   const int nkinds = 2;
     169          52 :   int nblocks = cycles / cycles_per_block + 1;
     170             : 
     171             :   /* we can not have more blocks than the number of tasks */
     172          52 :   if (cycles == 1) {
     173          52 :     nblocks = 1;
     174             :   }
     175             : 
     176          52 :   int block_offsets[nblocks];
     177          52 :   memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
     178          52 :   const double atom_positions[2][3] = {
     179          52 :       {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
     180          52 :   const int atom_kinds[2] = {1, 2};
     181          52 :   const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
     182          52 :   const int ipgf = o1 / ncoset(la_max) + 1;
     183          52 :   const int jpgf = o2 / ncoset(lb_max) + 1;
     184          52 :   assert(o1 == (ipgf - 1) * ncoset(la_max));
     185          52 :   assert(o2 == (jpgf - 1) * ncoset(lb_max));
     186             : 
     187          52 :   int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
     188          52 :   int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
     189          52 :       jpgf_list[ntasks];
     190          52 :   int border_mask_list[ntasks], block_num_list[ntasks];
     191          52 :   double radius_list[ntasks], rab_list_mutable[ntasks][3];
     192         104 :   for (int i = 0; i < cycles; i++) {
     193          52 :     level_list[i] = 1;
     194          52 :     iatom_list[i] = 1;
     195          52 :     jatom_list[i] = 2;
     196          52 :     iset_list[i] = 1;
     197          52 :     jset_list[i] = 1;
     198          52 :     ipgf_list[i] = ipgf;
     199          52 :     jpgf_list[i] = jpgf;
     200          52 :     border_mask_list[i] = border_mask;
     201          52 :     block_num_list[i] = i / cycles_per_block + 1;
     202          52 :     radius_list[i] = radius;
     203          52 :     rab_list_mutable[i][0] = rab[0];
     204          52 :     rab_list_mutable[i][1] = rab[1];
     205          52 :     rab_list_mutable[i][2] = rab[2];
     206             :   }
     207          52 :   const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
     208             : 
     209          52 :   grid_create_task_list(
     210             :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     211             :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     212             :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
     213             :       block_num_list, radius_list, rab_list, npts_global, npts_local,
     214             :       shift_local, border_width, dh, dh_inv, task_list);
     215          52 : }
     216             : 
     217             : /*******************************************************************************
     218             :  * \brief Reads a .task file, collocates/integrates it, and compares results.
     219             :  *        See grid_replay.h for details.
     220             :  * \author Ole Schuett
     221             :  ******************************************************************************/
     222         104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
     223             :                  const bool batch, const int cycles_per_block,
     224         104 :                  const double tolerance) {
     225             : 
     226         104 :   if (cycles < 1) {
     227           0 :     fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
     228           0 :     exit(1);
     229             :   }
     230             : 
     231         104 :   if (cycles_per_block < 1 || cycles_per_block > cycles) {
     232           0 :     fprintf(stderr,
     233             :             "Error: Cycles per block has to be between 1 and cycles.\n");
     234           0 :     exit(1);
     235             :   }
     236             : 
     237         104 :   FILE *fp = fopen(filename, "r");
     238         104 :   if (fp == NULL) {
     239           0 :     fprintf(stderr, "Could not open task file: %s\n", filename);
     240           0 :     exit(1);
     241             :   }
     242             : 
     243         104 :   char header_line[100];
     244         104 :   read_next_line(header_line, sizeof(header_line), fp);
     245         104 :   if (strcmp(header_line, "#Grid task v10\n") != 0) {
     246           0 :     fprintf(stderr, "Error: Wrong file header.\n");
     247           0 :     abort();
     248             :   }
     249             : 
     250         104 :   const bool orthorhombic = parse_int("orthorhombic", fp);
     251         104 :   const int border_mask = parse_int("border_mask", fp);
     252         104 :   const enum grid_func func = (enum grid_func)parse_int("func", fp);
     253         104 :   const bool compute_tau = (func == GRID_FUNC_DADB);
     254         104 :   const int la_max = parse_int("la_max", fp);
     255         104 :   const int la_min = parse_int("la_min", fp);
     256         104 :   const int lb_max = parse_int("lb_max", fp);
     257         104 :   const int lb_min = parse_int("lb_min", fp);
     258         104 :   const double zeta = parse_double("zeta", fp);
     259         104 :   const double zetb = parse_double("zetb", fp);
     260         104 :   const double rscale = parse_double("rscale", fp);
     261             : 
     262         104 :   double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
     263         104 :   parse_double3x3("dh", fp, dh_mutable);
     264         104 :   parse_double3x3("dh_inv", fp, dh_inv_mutable);
     265         104 :   parse_double3("ra", fp, ra);
     266         104 :   parse_double3("rab", fp, rab);
     267         104 :   const double(*dh)[3] = (const double(*)[3])dh_mutable;
     268         104 :   const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
     269             : 
     270         104 :   int npts_global[3], npts_local[3], shift_local[3], border_width[3];
     271         104 :   parse_int3("npts_global", fp, npts_global);
     272         104 :   parse_int3("npts_local", fp, npts_local);
     273         104 :   parse_int3("shift_local", fp, shift_local);
     274         104 :   parse_int3("border_width", fp, border_width);
     275             : 
     276         104 :   const double radius = parse_double("radius", fp);
     277         104 :   const int o1 = parse_int("o1", fp);
     278         104 :   const int o2 = parse_int("o2", fp);
     279         104 :   const int n1 = parse_int("n1", fp);
     280         104 :   const int n2 = parse_int("n2", fp);
     281             : 
     282         104 :   double pab_mutable[n2][n1];
     283         104 :   char format[100];
     284        1688 :   for (int i = 0; i < n2; i++) {
     285       46120 :     for (int j = 0; j < n1; j++) {
     286       44536 :       sprintf(format, "%i %i %%le", i, j);
     287       44536 :       parse_next_line("pab", fp, format, 1, &pab_mutable[i][j]);
     288             :     }
     289             :   }
     290         104 :   const double(*pab)[n1] = (const double(*)[n1])pab_mutable;
     291             : 
     292         104 :   const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
     293         104 :   offload_buffer *grid_ref = NULL;
     294         104 :   offload_create_buffer(npts_local_total, &grid_ref);
     295         104 :   memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
     296             : 
     297         104 :   const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
     298      578328 :   for (int n = 0; n < ngrid_nonzero; n++) {
     299      578224 :     int i, j, k;
     300      578224 :     double value;
     301      578224 :     parse_next_line("grid", fp, "%i %i %i %le", 4, &i, &j, &k, &value);
     302      578224 :     grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
     303      578224 :                           j * npts_local[0] + i] = value;
     304             :   }
     305             : 
     306         104 :   double hab_ref[n2][n1];
     307         104 :   memset(hab_ref, 0, n2 * n1 * sizeof(double));
     308         896 :   for (int i = o2; i < ncoset(lb_max) + o2; i++) {
     309       29848 :     for (int j = o1; j < ncoset(la_max) + o1; j++) {
     310       29056 :       sprintf(format, "%i %i %%le", i, j);
     311       29056 :       parse_next_line("hab", fp, format, 1, &hab_ref[i][j]);
     312             :     }
     313             :   }
     314             : 
     315         104 :   double forces_ref[2][3];
     316         104 :   parse_double3("force_a", fp, forces_ref[0]);
     317         104 :   parse_double3("force_b", fp, forces_ref[1]);
     318             : 
     319         104 :   double virial_ref[3][3];
     320         104 :   parse_double3x3("virial", fp, virial_ref);
     321             : 
     322         104 :   char footer_line[100];
     323         104 :   read_next_line(footer_line, sizeof(footer_line), fp);
     324         104 :   if (strcmp(footer_line, "#THE_END\n") != 0) {
     325           0 :     fprintf(stderr, "Error: Wrong footer line.\n");
     326           0 :     abort();
     327             :   }
     328             : 
     329         104 :   offload_buffer *grid_test = NULL;
     330         104 :   offload_create_buffer(npts_local_total, &grid_test);
     331         104 :   double hab_test[n2][n1];
     332         104 :   double forces_test[2][3];
     333         104 :   double virial_test[3][3];
     334         104 :   double start_time, end_time;
     335             : 
     336         104 :   if (batch) {
     337          52 :     grid_basis_set *basisa = NULL, *basisb = NULL;
     338          52 :     create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
     339          52 :     create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
     340          52 :     grid_task_list *task_list = NULL;
     341          52 :     create_dummy_task_list(
     342             :         orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
     343             :         la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
     344             :         (const int(*)[3])npts_local, (const int(*)[3])shift_local,
     345             :         (const int(*)[3])border_width, (const double(*)[3][3])dh,
     346             :         (const double(*)[3][3])dh_inv, &task_list);
     347          52 :     offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
     348          52 :     offload_create_buffer(n1 * n2, &pab_blocks);
     349          52 :     offload_create_buffer(n1 * n2, &hab_blocks);
     350          52 :     const double f = (collocate) ? rscale : 1.0;
     351         944 :     for (int i = 0; i < n1; i++) {
     352       23160 :       for (int j = 0; j < n2; j++) {
     353       22268 :         pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
     354             :       }
     355             :     }
     356          52 :     start_time = omp_get_wtime();
     357          52 :     const int nlevels = 1;
     358          52 :     const int natoms = 2;
     359          52 :     if (collocate) {
     360             :       // collocate
     361          26 :       offload_buffer *grids[1] = {grid_test};
     362          26 :       grid_collocate_task_list(task_list, func, nlevels,
     363             :                                (const int(*)[3])npts_local, pab_blocks, grids);
     364             :     } else {
     365             :       // integrate
     366          26 :       const offload_buffer *grids[1] = {grid_ref};
     367          26 :       grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
     368             :                                (const int(*)[3])npts_local, pab_blocks, grids,
     369             :                                hab_blocks, forces_test, virial_test);
     370         422 :       for (int i = 0; i < n2; i++) {
     371       11530 :         for (int j = 0; j < n1; j++) {
     372       11134 :           hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
     373             :         }
     374             :       }
     375             :     }
     376          52 :     end_time = omp_get_wtime();
     377          52 :     grid_free_basis_set(basisa);
     378          52 :     grid_free_basis_set(basisb);
     379          52 :     grid_free_task_list(task_list);
     380          52 :     offload_free_buffer(pab_blocks);
     381          52 :     offload_free_buffer(hab_blocks);
     382             :   } else {
     383          52 :     start_time = omp_get_wtime();
     384          52 :     if (collocate) {
     385             :       // collocate
     386          26 :       memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
     387          52 :       for (int i = 0; i < cycles; i++) {
     388          26 :         grid_cpu_collocate_pgf_product(
     389             :             orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
     390             :             zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
     391             :             shift_local, border_width, radius, o1, o2, n1, n2, pab,
     392          26 :             grid_test->host_buffer);
     393             :       }
     394             :     } else {
     395             :       // integrate
     396          26 :       memset(hab_test, 0, n2 * n1 * sizeof(double));
     397          26 :       memset(forces_test, 0, 2 * 3 * sizeof(double));
     398          26 :       double virials_test[2][3][3] = {0};
     399          52 :       for (int i = 0; i < cycles; i++) {
     400          26 :         grid_cpu_integrate_pgf_product(
     401             :             orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
     402             :             lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
     403             :             shift_local, border_width, radius, o1, o2, n1, n2,
     404          26 :             grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
     405             :             NULL, NULL, NULL);
     406             :       }
     407         104 :       for (int i = 0; i < 3; i++) {
     408         312 :         for (int j = 0; j < 3; j++) {
     409         234 :           virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
     410             :         }
     411             :       }
     412             :     }
     413          52 :     end_time = omp_get_wtime();
     414             :   }
     415             : 
     416         104 :   double max_value = 0.0;
     417         104 :   double max_rel_diff = 0.0;
     418         104 :   const double derivatives_precision = 1e-4; // account for higher numeric noise
     419         104 :   if (collocate) {
     420             :     // collocate
     421             :     // compare grid
     422     7151464 :     for (int i = 0; i < npts_local_total; i++) {
     423     7151412 :       const double ref_value = cycles * grid_ref->host_buffer[i];
     424     7151412 :       const double test_value = grid_test->host_buffer[i];
     425     7151412 :       const double diff = fabs(test_value - ref_value);
     426     7151412 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     427     7151412 :       max_rel_diff = fmax(max_rel_diff, rel_diff);
     428     7151412 :       max_value = fmax(max_value, fabs(test_value));
     429             :     }
     430             :   } else {
     431             :     // integrate
     432             :     // compare hab
     433         844 :     for (int i = 0; i < n2; i++) {
     434       23060 :       for (int j = 0; j < n1; j++) {
     435       22268 :         const double ref_value = cycles * hab_ref[i][j];
     436       22268 :         const double test_value = hab_test[i][j];
     437       22268 :         const double diff = fabs(test_value - ref_value);
     438       22268 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     439       22268 :         max_rel_diff = fmax(max_rel_diff, rel_diff);
     440       22268 :         max_value = fmax(max_value, fabs(test_value));
     441       22268 :         if (rel_diff > tolerance) {
     442       22268 :           printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
     443             :                  j, ref_value, test_value, diff, rel_diff);
     444             :         }
     445             :       }
     446             :     }
     447             :     // compare forces
     448         156 :     for (int i = 0; i < 2; i++) {
     449         416 :       for (int j = 0; j < 3; j++) {
     450         312 :         const double ref_value = cycles * forces_ref[i][j];
     451         312 :         const double test_value = forces_test[i][j];
     452         312 :         const double diff = fabs(test_value - ref_value);
     453         312 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     454         312 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     455         312 :         if (rel_diff * derivatives_precision > tolerance) {
     456         312 :           printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     457             :                  i, j, ref_value, test_value, diff, rel_diff);
     458             :         }
     459             :       }
     460             :     }
     461             :     // compare virial
     462         208 :     for (int i = 0; i < 3; i++) {
     463         624 :       for (int j = 0; j < 3; j++) {
     464         468 :         const double ref_value = cycles * virial_ref[i][j];
     465         468 :         const double test_value = virial_test[i][j];
     466         468 :         const double diff = fabs(test_value - ref_value);
     467         468 :         const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     468         468 :         max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
     469         468 :         if (rel_diff * derivatives_precision > tolerance) {
     470         468 :           printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
     471             :                  i, j, ref_value, test_value, diff, rel_diff);
     472             :         }
     473             :       }
     474             :     }
     475             :   }
     476         104 :   printf("Task: %-55s   %9s %-7s   Cycles: %e   Max value: %le   "
     477             :          "Max rel diff: %le   Time: %le sec\n",
     478             :          filename, collocate ? "Collocate" : "Integrate",
     479         104 :          batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
     480             :          end_time - start_time);
     481             : 
     482         104 :   offload_free_buffer(grid_ref);
     483         104 :   offload_free_buffer(grid_test);
     484             : 
     485             :   // Check floating point exceptions.
     486         104 :   if (fetestexcept(FE_DIVBYZERO) != 0) {
     487           0 :     fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
     488           0 :     exit(1);
     489             :   }
     490         104 :   if (fetestexcept(FE_OVERFLOW) != 0) {
     491           0 :     fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
     492           0 :     exit(1);
     493             :   }
     494             : 
     495         104 :   return max_rel_diff < tolerance;
     496             : }
     497             : 
     498             : // EOF

Generated by: LCOV version 1.15