Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <fenv.h>
10 : #include <limits.h>
11 : #include <math.h>
12 : #include <omp.h>
13 : #include <stdarg.h>
14 : #include <stdbool.h>
15 : #include <stdio.h>
16 : #include <stdlib.h>
17 : #include <string.h>
18 :
19 : #include "../offload/offload_buffer.h"
20 : #include "common/grid_common.h"
21 : #include "grid_replay.h"
22 :
23 : #include "cpu/grid_cpu_collocate.h"
24 : #include "cpu/grid_cpu_integrate.h"
25 : #include "grid_task_list.h"
26 :
27 : /*******************************************************************************
28 : * \brief Reads next line from given filehandle and handles errors.
29 : * \author Ole Schuett
30 : ******************************************************************************/
31 655456 : static void read_next_line(char line[], int length, FILE *fp) {
32 1310912 : if (fgets(line, length, fp) == NULL) {
33 0 : fprintf(stderr, "Error: Could not read line.\n");
34 0 : abort();
35 : }
36 655456 : }
37 :
38 : /*******************************************************************************
39 : * \brief Shorthand for parsing a single integer value.
40 : * \author Ole Schuett
41 : ******************************************************************************/
42 1248 : static int parse_int(const char key[], FILE *fp) {
43 1248 : int value;
44 1248 : char line[100], format[100];
45 1248 : read_next_line(line, sizeof(line), fp);
46 1248 : snprintf(format, sizeof(format), "%s %%i", key);
47 1248 : if (sscanf(line, format, &value) != 1) {
48 0 : assert(!"parse_int failed");
49 : }
50 1248 : return value;
51 : }
52 :
53 : /*******************************************************************************
54 : * \brief Shorthand for parsing a vector of three integer values.
55 : * \author Ole Schuett
56 : ******************************************************************************/
57 416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
58 416 : char line[100], format[100];
59 416 : read_next_line(line, sizeof(line), fp);
60 416 : snprintf(format, sizeof(format), "%s %%i %%i %%i", key);
61 416 : if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
62 0 : assert(!"parse_int3 failed");
63 : }
64 416 : }
65 :
66 : /*******************************************************************************
67 : * \brief Shorthand for parsing a single double value.
68 : * \author Ole Schuett
69 : ******************************************************************************/
70 416 : static double parse_double(const char key[], FILE *fp) {
71 416 : double value;
72 416 : char line[100], format[100];
73 416 : read_next_line(line, sizeof(line), fp);
74 416 : snprintf(format, sizeof(format), "%s %%lf", key);
75 416 : if (sscanf(line, format, &value) != 1) {
76 0 : assert(!"parse_double failed");
77 : }
78 416 : return value;
79 : }
80 :
81 : /*******************************************************************************
82 : * \brief Shorthand for parsing a vector of three double values.
83 : * \author Ole Schuett
84 : ******************************************************************************/
85 416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
86 416 : char line[100], format[100];
87 416 : read_next_line(line, sizeof(line), fp);
88 416 : snprintf(format, sizeof(format), "%s %%lf %%lf %%lf", key);
89 416 : if (sscanf(line, format, &vec[0], &vec[1], &vec[2]) != 3) {
90 0 : assert(!"parse_double3 failed");
91 : }
92 416 : }
93 :
94 : /*******************************************************************************
95 : * \brief Shorthand for parsing a 3x3 matrix of doubles.
96 : * \author Ole Schuett
97 : ******************************************************************************/
98 312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
99 312 : char line[100], format[100];
100 1248 : for (int i = 0; i < 3; i++) {
101 936 : read_next_line(line, sizeof(line), fp);
102 936 : snprintf(format, sizeof(format), "%s %i %%lf %%lf %%lf", key, i);
103 936 : if (sscanf(line, format, &mat[i][0], &mat[i][1], &mat[i][2]) != 3) {
104 0 : assert(!"parse_double3x3 failed");
105 : }
106 : }
107 312 : }
108 :
109 : /*******************************************************************************
110 : * \brief Creates mock basis set using the identity as decontraction matrix.
111 : * \author Ole Schuett
112 : ******************************************************************************/
113 104 : static void create_dummy_basis_set(const int size, const int lmin,
114 : const int lmax, const double zet,
115 104 : grid_basis_set **basis_set) {
116 :
117 104 : double sphi_mutable[size][size];
118 1788 : for (int i = 0; i < size; i++) {
119 47808 : for (int j = 0; j < size; j++) {
120 90564 : sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
121 : }
122 : }
123 104 : const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
124 :
125 104 : const int npgf = size / ncoset(lmax);
126 104 : assert(size == npgf * ncoset(lmax));
127 :
128 104 : const int first_sgf[1] = {1};
129 :
130 104 : double zet_array_mutable[1][npgf];
131 808 : for (int i = 0; i < npgf; i++) {
132 704 : zet_array_mutable[0][i] = zet;
133 : }
134 104 : const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
135 :
136 104 : grid_create_basis_set(/*nset=*/1,
137 : /*nsgf=*/size,
138 : /*maxco=*/size,
139 : /*maxpgf=*/npgf,
140 : /*lmin=*/&lmin,
141 : /*lmax=*/&lmax,
142 : /*npgf=*/&npgf,
143 : /*nsgf_set=*/&size,
144 : /*first_sgf=*/first_sgf,
145 : /*sphi=*/sphi,
146 : /*zet=*/zet_array, basis_set);
147 104 : }
148 :
149 : /*******************************************************************************
150 : * \brief Creates mock task list with one task per cycle.
151 : * \author Ole Schuett
152 : ******************************************************************************/
153 52 : static void create_dummy_task_list(
154 : const bool orthorhombic, const int border_mask, const double ra[3],
155 : const double rab[3], const double radius, const grid_basis_set *basis_set_a,
156 : const grid_basis_set *basis_set_b, const int o1, const int o2,
157 : const int la_max, const int lb_max, const int cycles,
158 : const int cycles_per_block, const int npts_global[][3],
159 : const int npts_local[][3], const int shift_local[][3],
160 : const int border_width[][3], const double dh[][3][3],
161 52 : const double dh_inv[][3][3], grid_task_list **task_list) {
162 :
163 52 : const int ntasks = cycles;
164 52 : const int nlevels = 1;
165 52 : const int natoms = 2;
166 52 : const int nkinds = 2;
167 52 : int nblocks = cycles / cycles_per_block + 1;
168 :
169 : /* we can not have more blocks than the number of tasks */
170 52 : if (cycles == 1) {
171 52 : nblocks = 1;
172 : }
173 :
174 52 : int block_offsets[nblocks];
175 52 : memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
176 52 : const double atom_positions[2][3] = {
177 52 : {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
178 52 : const int atom_kinds[2] = {1, 2};
179 52 : const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
180 52 : const int ipgf = o1 / ncoset(la_max) + 1;
181 52 : const int jpgf = o2 / ncoset(lb_max) + 1;
182 52 : assert(o1 == (ipgf - 1) * ncoset(la_max));
183 52 : assert(o2 == (jpgf - 1) * ncoset(lb_max));
184 :
185 52 : int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
186 52 : int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
187 52 : jpgf_list[ntasks];
188 52 : int border_mask_list[ntasks], block_num_list[ntasks];
189 52 : double radius_list[ntasks], rab_list_mutable[ntasks][3];
190 104 : for (int i = 0; i < cycles; i++) {
191 52 : level_list[i] = 1;
192 52 : iatom_list[i] = 1;
193 52 : jatom_list[i] = 2;
194 52 : iset_list[i] = 1;
195 52 : jset_list[i] = 1;
196 52 : ipgf_list[i] = ipgf;
197 52 : jpgf_list[i] = jpgf;
198 52 : border_mask_list[i] = border_mask;
199 52 : block_num_list[i] = i / cycles_per_block + 1;
200 52 : radius_list[i] = radius;
201 52 : rab_list_mutable[i][0] = rab[0];
202 52 : rab_list_mutable[i][1] = rab[1];
203 52 : rab_list_mutable[i][2] = rab[2];
204 : }
205 52 : const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
206 :
207 52 : grid_create_task_list(
208 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
209 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
210 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
211 : block_num_list, radius_list, rab_list, npts_global, npts_local,
212 : shift_local, border_width, dh, dh_inv, task_list);
213 52 : }
214 :
215 : /*******************************************************************************
216 : * \brief Reads a .task file, collocates/integrates it, and compares results.
217 : * See grid_replay.h for details.
218 : * \author Ole Schuett
219 : ******************************************************************************/
220 104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
221 : const bool batch, const int cycles_per_block,
222 104 : const double tolerance) {
223 :
224 104 : if (cycles < 1) {
225 0 : fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
226 0 : exit(1);
227 : }
228 :
229 104 : if (cycles_per_block < 1 || cycles_per_block > cycles) {
230 0 : fprintf(stderr,
231 : "Error: Cycles per block has to be between 1 and cycles.\n");
232 0 : exit(1);
233 : }
234 :
235 104 : FILE *fp = fopen(filename, "r");
236 104 : if (fp == NULL) {
237 0 : fprintf(stderr, "Could not open task file: %s\n", filename);
238 0 : exit(1);
239 : }
240 :
241 104 : char header_line[100];
242 104 : read_next_line(header_line, sizeof(header_line), fp);
243 104 : if (strcmp(header_line, "#Grid task v10\n") != 0) {
244 0 : fprintf(stderr, "Error: Wrong file header.\n");
245 0 : abort();
246 : }
247 :
248 104 : const bool orthorhombic = parse_int("orthorhombic", fp);
249 104 : const int border_mask = parse_int("border_mask", fp);
250 104 : const enum grid_func func = (enum grid_func)parse_int("func", fp);
251 104 : const bool compute_tau = (func == GRID_FUNC_DADB);
252 104 : const int la_max = parse_int("la_max", fp);
253 104 : const int la_min = parse_int("la_min", fp);
254 104 : const int lb_max = parse_int("lb_max", fp);
255 104 : const int lb_min = parse_int("lb_min", fp);
256 104 : const double zeta = parse_double("zeta", fp);
257 104 : const double zetb = parse_double("zetb", fp);
258 104 : const double rscale = parse_double("rscale", fp);
259 :
260 104 : double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
261 104 : parse_double3x3("dh", fp, dh_mutable);
262 104 : parse_double3x3("dh_inv", fp, dh_inv_mutable);
263 104 : parse_double3("ra", fp, ra);
264 104 : parse_double3("rab", fp, rab);
265 104 : const double(*dh)[3] = (const double(*)[3])dh_mutable;
266 104 : const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
267 :
268 104 : int npts_global[3], npts_local[3], shift_local[3], border_width[3];
269 104 : parse_int3("npts_global", fp, npts_global);
270 104 : parse_int3("npts_local", fp, npts_local);
271 104 : parse_int3("shift_local", fp, shift_local);
272 104 : parse_int3("border_width", fp, border_width);
273 :
274 104 : const double radius = parse_double("radius", fp);
275 104 : const int o1 = parse_int("o1", fp);
276 104 : const int o2 = parse_int("o2", fp);
277 104 : const int n1 = parse_int("n1", fp);
278 104 : const int n2 = parse_int("n2", fp);
279 :
280 104 : double pab_mutable[n2][n1];
281 104 : char line[100], format[100];
282 1688 : for (int i = 0; i < n2; i++) {
283 46120 : for (int j = 0; j < n1; j++) {
284 44536 : read_next_line(line, sizeof(line), fp);
285 44536 : snprintf(format, sizeof(format), "pab %i %i %%lf", i, j);
286 44536 : if (sscanf(line, format, &pab_mutable[i][j]) != 1) {
287 0 : assert(!"parse_pab failed");
288 : }
289 : }
290 : }
291 104 : const double(*pab)[n1] = (const double(*)[n1])pab_mutable;
292 :
293 104 : const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
294 104 : offload_buffer *grid_ref = NULL;
295 104 : offload_create_buffer(npts_local_total, &grid_ref);
296 104 : memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
297 :
298 104 : const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
299 578328 : for (int n = 0; n < ngrid_nonzero; n++) {
300 578224 : int i, j, k;
301 578224 : double value;
302 578224 : read_next_line(line, sizeof(line), fp);
303 578224 : if (sscanf(line, "grid %i %i %i %le", &i, &j, &k, &value) != 4) {
304 0 : assert(!"parse_grid failed");
305 : }
306 578224 : grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
307 578224 : j * npts_local[0] + i] = value;
308 : }
309 :
310 104 : double hab_ref[n2][n1];
311 104 : memset(hab_ref, 0, n2 * n1 * sizeof(double));
312 896 : for (int i = o2; i < ncoset(lb_max) + o2; i++) {
313 29848 : for (int j = o1; j < ncoset(la_max) + o1; j++) {
314 29056 : read_next_line(line, sizeof(line), fp);
315 29056 : snprintf(format, sizeof(format), "hab %i %i %%lf", i, j);
316 29056 : if (sscanf(line, format, &hab_ref[i][j]) != 1) {
317 0 : assert(!"parse_hab failed");
318 : }
319 : }
320 : }
321 :
322 104 : double forces_ref[2][3];
323 104 : parse_double3("force_a", fp, forces_ref[0]);
324 104 : parse_double3("force_b", fp, forces_ref[1]);
325 :
326 104 : double virial_ref[3][3];
327 104 : parse_double3x3("virial", fp, virial_ref);
328 :
329 104 : char footer_line[100];
330 104 : read_next_line(footer_line, sizeof(footer_line), fp);
331 104 : if (strcmp(footer_line, "#THE_END\n") != 0) {
332 0 : fprintf(stderr, "Error: Wrong footer line.\n");
333 0 : abort();
334 : }
335 :
336 104 : if (fclose(fp) != 0) {
337 0 : fprintf(stderr, "Could not close task file: %s\n", filename);
338 0 : abort();
339 : }
340 :
341 104 : offload_buffer *grid_test = NULL;
342 104 : offload_create_buffer(npts_local_total, &grid_test);
343 104 : double hab_test[n2][n1];
344 104 : double forces_test[2][3];
345 104 : double virial_test[3][3];
346 104 : double start_time, end_time;
347 :
348 104 : if (batch) {
349 52 : grid_basis_set *basisa = NULL, *basisb = NULL;
350 52 : create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
351 52 : create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
352 52 : grid_task_list *task_list = NULL;
353 52 : create_dummy_task_list(
354 : orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
355 : la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
356 : (const int(*)[3])npts_local, (const int(*)[3])shift_local,
357 : (const int(*)[3])border_width, (const double(*)[3][3])dh,
358 : (const double(*)[3][3])dh_inv, &task_list);
359 52 : offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
360 52 : offload_create_buffer(n1 * n2, &pab_blocks);
361 52 : offload_create_buffer(n1 * n2, &hab_blocks);
362 52 : const double f = (collocate) ? rscale : 1.0;
363 944 : for (int i = 0; i < n1; i++) {
364 23160 : for (int j = 0; j < n2; j++) {
365 22268 : pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
366 : }
367 : }
368 52 : start_time = omp_get_wtime();
369 52 : const int nlevels = 1;
370 52 : const int natoms = 2;
371 52 : if (collocate) {
372 : // collocate
373 26 : offload_buffer *grids[1] = {grid_test};
374 26 : grid_collocate_task_list(task_list, func, nlevels,
375 : (const int(*)[3])npts_local, pab_blocks, grids);
376 : } else {
377 : // integrate
378 26 : const offload_buffer *grids[1] = {grid_ref};
379 26 : grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
380 : (const int(*)[3])npts_local, pab_blocks, grids,
381 : hab_blocks, forces_test, virial_test);
382 422 : for (int i = 0; i < n2; i++) {
383 11530 : for (int j = 0; j < n1; j++) {
384 11134 : hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
385 : }
386 : }
387 : }
388 52 : end_time = omp_get_wtime();
389 52 : grid_free_basis_set(basisa);
390 52 : grid_free_basis_set(basisb);
391 52 : grid_free_task_list(task_list);
392 52 : offload_free_buffer(pab_blocks);
393 52 : offload_free_buffer(hab_blocks);
394 : } else {
395 52 : start_time = omp_get_wtime();
396 52 : if (collocate) {
397 : // collocate
398 26 : memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
399 52 : for (int i = 0; i < cycles; i++) {
400 26 : grid_cpu_collocate_pgf_product(
401 : orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
402 : zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
403 : shift_local, border_width, radius, o1, o2, n1, n2, pab,
404 26 : grid_test->host_buffer);
405 : }
406 : } else {
407 : // integrate
408 26 : memset(hab_test, 0, n2 * n1 * sizeof(double));
409 26 : memset(forces_test, 0, 2 * 3 * sizeof(double));
410 26 : double virials_test[2][3][3] = {0};
411 52 : for (int i = 0; i < cycles; i++) {
412 26 : grid_cpu_integrate_pgf_product(
413 : orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
414 : lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
415 : shift_local, border_width, radius, o1, o2, n1, n2,
416 26 : grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
417 : NULL, NULL, NULL);
418 : }
419 104 : for (int i = 0; i < 3; i++) {
420 312 : for (int j = 0; j < 3; j++) {
421 234 : virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
422 : }
423 : }
424 : }
425 52 : end_time = omp_get_wtime();
426 : }
427 :
428 104 : double max_value = 0.0;
429 104 : double max_rel_diff = 0.0;
430 104 : const double derivatives_precision = 1e-4; // account for higher numeric noise
431 104 : if (collocate) {
432 : // collocate
433 : // compare grid
434 7151464 : for (int i = 0; i < npts_local_total; i++) {
435 7151412 : const double ref_value = cycles * grid_ref->host_buffer[i];
436 7151412 : const double test_value = grid_test->host_buffer[i];
437 7151412 : const double diff = fabs(test_value - ref_value);
438 7151412 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
439 7151412 : max_rel_diff = fmax(max_rel_diff, rel_diff);
440 7151412 : max_value = fmax(max_value, fabs(test_value));
441 : }
442 : } else {
443 : // integrate
444 : // compare hab
445 844 : for (int i = 0; i < n2; i++) {
446 23060 : for (int j = 0; j < n1; j++) {
447 22268 : const double ref_value = cycles * hab_ref[i][j];
448 22268 : const double test_value = hab_test[i][j];
449 22268 : const double diff = fabs(test_value - ref_value);
450 22268 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
451 22268 : max_rel_diff = fmax(max_rel_diff, rel_diff);
452 22268 : max_value = fmax(max_value, fabs(test_value));
453 22268 : if (rel_diff > tolerance) {
454 0 : printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
455 : j, ref_value, test_value, diff, rel_diff);
456 : }
457 : }
458 : }
459 : // compare forces
460 156 : for (int i = 0; i < 2; i++) {
461 416 : for (int j = 0; j < 3; j++) {
462 312 : const double ref_value = cycles * forces_ref[i][j];
463 312 : const double test_value = forces_test[i][j];
464 312 : const double diff = fabs(test_value - ref_value);
465 312 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
466 312 : max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
467 312 : if (rel_diff * derivatives_precision > tolerance) {
468 0 : printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
469 : i, j, ref_value, test_value, diff, rel_diff);
470 : }
471 : }
472 : }
473 : // compare virial
474 208 : for (int i = 0; i < 3; i++) {
475 624 : for (int j = 0; j < 3; j++) {
476 468 : const double ref_value = cycles * virial_ref[i][j];
477 468 : const double test_value = virial_test[i][j];
478 468 : const double diff = fabs(test_value - ref_value);
479 468 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
480 468 : max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
481 468 : if (rel_diff * derivatives_precision > tolerance) {
482 0 : printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
483 : i, j, ref_value, test_value, diff, rel_diff);
484 : }
485 : }
486 : }
487 : }
488 104 : printf("Task: %-55s %9s %-7s Cycles: %e Max value: %le "
489 : "Max rel diff: %le Time: %le sec\n",
490 : filename, collocate ? "Collocate" : "Integrate",
491 104 : batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
492 : end_time - start_time);
493 :
494 104 : offload_free_buffer(grid_ref);
495 104 : offload_free_buffer(grid_test);
496 :
497 : // Check floating point exceptions.
498 104 : if (fetestexcept(FE_DIVBYZERO) != 0) {
499 0 : fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
500 0 : exit(1);
501 : }
502 104 : if (fetestexcept(FE_OVERFLOW) != 0) {
503 0 : fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
504 0 : exit(1);
505 : }
506 :
507 104 : return max_rel_diff < tolerance;
508 : }
509 :
510 : // EOF
|