Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <fenv.h>
10 : #include <limits.h>
11 : #include <math.h>
12 : #include <omp.h>
13 : #include <stdarg.h>
14 : #include <stdbool.h>
15 : #include <stdio.h>
16 : #include <stdlib.h>
17 : #include <string.h>
18 :
19 : #include "../offload/offload_buffer.h"
20 : #include "common/grid_common.h"
21 : #include "grid_replay.h"
22 :
23 : #include "cpu/grid_cpu_collocate.h"
24 : #include "cpu/grid_cpu_integrate.h"
25 : #include "grid_task_list.h"
26 :
27 : /*******************************************************************************
28 : * \brief Reads next line from given filehandle and handles errors.
29 : * \author Ole Schuett
30 : ******************************************************************************/
31 655456 : static void read_next_line(char line[], int length, FILE *fp) {
32 1310912 : if (fgets(line, length, fp) == NULL) {
33 0 : fprintf(stderr, "Error: Could not read line.\n");
34 0 : abort();
35 : }
36 655456 : }
37 :
38 : /*******************************************************************************
39 : * \brief Parses next line from file, expecting it to match "${key} ${format}".
40 : * \author Ole Schuett
41 : ******************************************************************************/
42 655248 : static void parse_next_line(const char key[], FILE *fp, const char format[],
43 : const int nargs, ...) {
44 655248 : char line[100];
45 655248 : read_next_line(line, sizeof(line), fp);
46 :
47 655248 : char full_format[100];
48 655248 : strcpy(full_format, key);
49 655248 : strcat(full_format, " ");
50 655248 : strcat(full_format, format);
51 :
52 655248 : va_list varargs;
53 655248 : va_start(varargs, nargs);
54 655248 : if (vsscanf(line, full_format, varargs) != nargs) {
55 0 : fprintf(stderr, "Error: Could not parse line.\n");
56 0 : fprintf(stderr, "Line: %s\n", line);
57 0 : fprintf(stderr, "Format: %s\n", full_format);
58 0 : abort();
59 : }
60 655248 : va_end(varargs);
61 655248 : }
62 :
63 : /*******************************************************************************
64 : * \brief Shorthand for parsing a single integer value.
65 : * \author Ole Schuett
66 : ******************************************************************************/
67 1248 : static int parse_int(const char key[], FILE *fp) {
68 1248 : int value;
69 1248 : parse_next_line(key, fp, "%i", 1, &value);
70 1248 : return value;
71 : }
72 :
73 : /*******************************************************************************
74 : * \brief Shorthand for parsing a vector of three integer values.
75 : * \author Ole Schuett
76 : ******************************************************************************/
77 416 : static void parse_int3(const char key[], FILE *fp, int vec[3]) {
78 416 : parse_next_line(key, fp, "%i %i %i", 3, &vec[0], &vec[1], &vec[2]);
79 416 : }
80 :
81 : /*******************************************************************************
82 : * \brief Shorthand for parsing a single double value.
83 : * \author Ole Schuett
84 : ******************************************************************************/
85 416 : static double parse_double(const char key[], FILE *fp) {
86 416 : double value;
87 416 : parse_next_line(key, fp, "%le", 1, &value);
88 416 : return value;
89 : }
90 :
91 : /*******************************************************************************
92 : * \brief Shorthand for parsing a vector of three double values.
93 : * \author Ole Schuett
94 : ******************************************************************************/
95 416 : static void parse_double3(const char key[], FILE *fp, double vec[3]) {
96 416 : parse_next_line(key, fp, "%le %le %le", 3, &vec[0], &vec[1], &vec[2]);
97 416 : }
98 :
99 : /*******************************************************************************
100 : * \brief Shorthand for parsing a 3x3 matrix of doubles.
101 : * \author Ole Schuett
102 : ******************************************************************************/
103 312 : static void parse_double3x3(const char key[], FILE *fp, double mat[3][3]) {
104 312 : char format[100];
105 1248 : for (int i = 0; i < 3; i++) {
106 936 : sprintf(format, "%i %%le %%le %%le", i);
107 936 : parse_next_line(key, fp, format, 3, &mat[i][0], &mat[i][1], &mat[i][2]);
108 : }
109 312 : }
110 :
111 : /*******************************************************************************
112 : * \brief Creates mock basis set using the identity as decontraction matrix.
113 : * \author Ole Schuett
114 : ******************************************************************************/
115 104 : static void create_dummy_basis_set(const int size, const int lmin,
116 : const int lmax, const double zet,
117 104 : grid_basis_set **basis_set) {
118 :
119 104 : double sphi_mutable[size][size];
120 1788 : for (int i = 0; i < size; i++) {
121 47808 : for (int j = 0; j < size; j++) {
122 90564 : sphi_mutable[i][j] = (i == j) ? 1.0 : 0.0; // identity matrix
123 : }
124 : }
125 104 : const double(*sphi)[size] = (const double(*)[size])sphi_mutable;
126 :
127 104 : const int npgf = size / ncoset(lmax);
128 104 : assert(size == npgf * ncoset(lmax));
129 :
130 104 : const int first_sgf[1] = {1};
131 :
132 104 : double zet_array_mutable[1][npgf];
133 808 : for (int i = 0; i < npgf; i++) {
134 704 : zet_array_mutable[0][i] = zet;
135 : }
136 104 : const double(*zet_array)[npgf] = (const double(*)[npgf])zet_array_mutable;
137 :
138 104 : grid_create_basis_set(/*nset=*/1,
139 : /*nsgf=*/size,
140 : /*maxco=*/size,
141 : /*maxpgf=*/npgf,
142 : /*lmin=*/&lmin,
143 : /*lmax=*/&lmax,
144 : /*npgf=*/&npgf,
145 : /*nsgf_set=*/&size,
146 : /*first_sgf=*/first_sgf,
147 : /*sphi=*/sphi,
148 : /*zet=*/zet_array, basis_set);
149 104 : }
150 :
151 : /*******************************************************************************
152 : * \brief Creates mock task list with one task per cycle.
153 : * \author Ole Schuett
154 : ******************************************************************************/
155 52 : static void create_dummy_task_list(
156 : const bool orthorhombic, const int border_mask, const double ra[3],
157 : const double rab[3], const double radius, const grid_basis_set *basis_set_a,
158 : const grid_basis_set *basis_set_b, const int o1, const int o2,
159 : const int la_max, const int lb_max, const int cycles,
160 : const int cycles_per_block, const int npts_global[][3],
161 : const int npts_local[][3], const int shift_local[][3],
162 : const int border_width[][3], const double dh[][3][3],
163 52 : const double dh_inv[][3][3], grid_task_list **task_list) {
164 :
165 52 : const int ntasks = cycles;
166 52 : const int nlevels = 1;
167 52 : const int natoms = 2;
168 52 : const int nkinds = 2;
169 52 : int nblocks = cycles / cycles_per_block + 1;
170 :
171 : /* we can not have more blocks than the number of tasks */
172 52 : if (cycles == 1) {
173 52 : nblocks = 1;
174 : }
175 :
176 52 : int block_offsets[nblocks];
177 52 : memset(block_offsets, 0, nblocks * sizeof(int)); // all point to same data
178 52 : const double atom_positions[2][3] = {
179 52 : {ra[0], ra[1], ra[2]}, {rab[0] + ra[0], rab[1] + ra[1], rab[2] + ra[2]}};
180 52 : const int atom_kinds[2] = {1, 2};
181 52 : const grid_basis_set *basis_sets[2] = {basis_set_a, basis_set_b};
182 52 : const int ipgf = o1 / ncoset(la_max) + 1;
183 52 : const int jpgf = o2 / ncoset(lb_max) + 1;
184 52 : assert(o1 == (ipgf - 1) * ncoset(la_max));
185 52 : assert(o2 == (jpgf - 1) * ncoset(lb_max));
186 :
187 52 : int level_list[ntasks], iatom_list[ntasks], jatom_list[ntasks];
188 52 : int iset_list[ntasks], jset_list[ntasks], ipgf_list[ntasks],
189 52 : jpgf_list[ntasks];
190 52 : int border_mask_list[ntasks], block_num_list[ntasks];
191 52 : double radius_list[ntasks], rab_list_mutable[ntasks][3];
192 104 : for (int i = 0; i < cycles; i++) {
193 52 : level_list[i] = 1;
194 52 : iatom_list[i] = 1;
195 52 : jatom_list[i] = 2;
196 52 : iset_list[i] = 1;
197 52 : jset_list[i] = 1;
198 52 : ipgf_list[i] = ipgf;
199 52 : jpgf_list[i] = jpgf;
200 52 : border_mask_list[i] = border_mask;
201 52 : block_num_list[i] = i / cycles_per_block + 1;
202 52 : radius_list[i] = radius;
203 52 : rab_list_mutable[i][0] = rab[0];
204 52 : rab_list_mutable[i][1] = rab[1];
205 52 : rab_list_mutable[i][2] = rab[2];
206 : }
207 52 : const double(*rab_list)[3] = (const double(*)[3])rab_list_mutable;
208 :
209 52 : grid_create_task_list(
210 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
211 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
212 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
213 : block_num_list, radius_list, rab_list, npts_global, npts_local,
214 : shift_local, border_width, dh, dh_inv, task_list);
215 52 : }
216 :
217 : /*******************************************************************************
218 : * \brief Reads a .task file, collocates/integrates it, and compares results.
219 : * See grid_replay.h for details.
220 : * \author Ole Schuett
221 : ******************************************************************************/
222 104 : bool grid_replay(const char *filename, const int cycles, const bool collocate,
223 : const bool batch, const int cycles_per_block,
224 104 : const double tolerance) {
225 :
226 104 : if (cycles < 1) {
227 0 : fprintf(stderr, "Error: Cycles have to be greater than zero.\n");
228 0 : exit(1);
229 : }
230 :
231 104 : if (cycles_per_block < 1 || cycles_per_block > cycles) {
232 0 : fprintf(stderr,
233 : "Error: Cycles per block has to be between 1 and cycles.\n");
234 0 : exit(1);
235 : }
236 :
237 104 : FILE *fp = fopen(filename, "r");
238 104 : if (fp == NULL) {
239 0 : fprintf(stderr, "Could not open task file: %s\n", filename);
240 0 : exit(1);
241 : }
242 :
243 104 : char header_line[100];
244 104 : read_next_line(header_line, sizeof(header_line), fp);
245 104 : if (strcmp(header_line, "#Grid task v10\n") != 0) {
246 0 : fprintf(stderr, "Error: Wrong file header.\n");
247 0 : abort();
248 : }
249 :
250 104 : const bool orthorhombic = parse_int("orthorhombic", fp);
251 104 : const int border_mask = parse_int("border_mask", fp);
252 104 : const enum grid_func func = (enum grid_func)parse_int("func", fp);
253 104 : const bool compute_tau = (func == GRID_FUNC_DADB);
254 104 : const int la_max = parse_int("la_max", fp);
255 104 : const int la_min = parse_int("la_min", fp);
256 104 : const int lb_max = parse_int("lb_max", fp);
257 104 : const int lb_min = parse_int("lb_min", fp);
258 104 : const double zeta = parse_double("zeta", fp);
259 104 : const double zetb = parse_double("zetb", fp);
260 104 : const double rscale = parse_double("rscale", fp);
261 :
262 104 : double dh_mutable[3][3], dh_inv_mutable[3][3], ra[3], rab[3];
263 104 : parse_double3x3("dh", fp, dh_mutable);
264 104 : parse_double3x3("dh_inv", fp, dh_inv_mutable);
265 104 : parse_double3("ra", fp, ra);
266 104 : parse_double3("rab", fp, rab);
267 104 : const double(*dh)[3] = (const double(*)[3])dh_mutable;
268 104 : const double(*dh_inv)[3] = (const double(*)[3])dh_inv_mutable;
269 :
270 104 : int npts_global[3], npts_local[3], shift_local[3], border_width[3];
271 104 : parse_int3("npts_global", fp, npts_global);
272 104 : parse_int3("npts_local", fp, npts_local);
273 104 : parse_int3("shift_local", fp, shift_local);
274 104 : parse_int3("border_width", fp, border_width);
275 :
276 104 : const double radius = parse_double("radius", fp);
277 104 : const int o1 = parse_int("o1", fp);
278 104 : const int o2 = parse_int("o2", fp);
279 104 : const int n1 = parse_int("n1", fp);
280 104 : const int n2 = parse_int("n2", fp);
281 :
282 104 : double pab_mutable[n2][n1];
283 104 : char format[100];
284 1688 : for (int i = 0; i < n2; i++) {
285 46120 : for (int j = 0; j < n1; j++) {
286 44536 : sprintf(format, "%i %i %%le", i, j);
287 44536 : parse_next_line("pab", fp, format, 1, &pab_mutable[i][j]);
288 : }
289 : }
290 104 : const double(*pab)[n1] = (const double(*)[n1])pab_mutable;
291 :
292 104 : const int npts_local_total = npts_local[0] * npts_local[1] * npts_local[2];
293 104 : offload_buffer *grid_ref = NULL;
294 104 : offload_create_buffer(npts_local_total, &grid_ref);
295 104 : memset(grid_ref->host_buffer, 0, npts_local_total * sizeof(double));
296 :
297 104 : const int ngrid_nonzero = parse_int("ngrid_nonzero", fp);
298 578328 : for (int n = 0; n < ngrid_nonzero; n++) {
299 578224 : int i, j, k;
300 578224 : double value;
301 578224 : parse_next_line("grid", fp, "%i %i %i %le", 4, &i, &j, &k, &value);
302 578224 : grid_ref->host_buffer[k * npts_local[1] * npts_local[0] +
303 578224 : j * npts_local[0] + i] = value;
304 : }
305 :
306 104 : double hab_ref[n2][n1];
307 104 : memset(hab_ref, 0, n2 * n1 * sizeof(double));
308 896 : for (int i = o2; i < ncoset(lb_max) + o2; i++) {
309 29848 : for (int j = o1; j < ncoset(la_max) + o1; j++) {
310 29056 : sprintf(format, "%i %i %%le", i, j);
311 29056 : parse_next_line("hab", fp, format, 1, &hab_ref[i][j]);
312 : }
313 : }
314 :
315 104 : double forces_ref[2][3];
316 104 : parse_double3("force_a", fp, forces_ref[0]);
317 104 : parse_double3("force_b", fp, forces_ref[1]);
318 :
319 104 : double virial_ref[3][3];
320 104 : parse_double3x3("virial", fp, virial_ref);
321 :
322 104 : char footer_line[100];
323 104 : read_next_line(footer_line, sizeof(footer_line), fp);
324 104 : if (strcmp(footer_line, "#THE_END\n") != 0) {
325 0 : fprintf(stderr, "Error: Wrong footer line.\n");
326 0 : abort();
327 : }
328 :
329 104 : if (fclose(fp) != 0) {
330 0 : fprintf(stderr, "Could not close task file: %s\n", filename);
331 0 : abort();
332 : }
333 :
334 104 : offload_buffer *grid_test = NULL;
335 104 : offload_create_buffer(npts_local_total, &grid_test);
336 104 : double hab_test[n2][n1];
337 104 : double forces_test[2][3];
338 104 : double virial_test[3][3];
339 104 : double start_time, end_time;
340 :
341 104 : if (batch) {
342 52 : grid_basis_set *basisa = NULL, *basisb = NULL;
343 52 : create_dummy_basis_set(n1, la_min, la_max, zeta, &basisa);
344 52 : create_dummy_basis_set(n2, lb_min, lb_max, zetb, &basisb);
345 52 : grid_task_list *task_list = NULL;
346 52 : create_dummy_task_list(
347 : orthorhombic, border_mask, ra, rab, radius, basisa, basisb, o1, o2,
348 : la_max, lb_max, cycles, cycles_per_block, (const int(*)[3])npts_global,
349 : (const int(*)[3])npts_local, (const int(*)[3])shift_local,
350 : (const int(*)[3])border_width, (const double(*)[3][3])dh,
351 : (const double(*)[3][3])dh_inv, &task_list);
352 52 : offload_buffer *pab_blocks = NULL, *hab_blocks = NULL;
353 52 : offload_create_buffer(n1 * n2, &pab_blocks);
354 52 : offload_create_buffer(n1 * n2, &hab_blocks);
355 52 : const double f = (collocate) ? rscale : 1.0;
356 944 : for (int i = 0; i < n1; i++) {
357 23160 : for (int j = 0; j < n2; j++) {
358 22268 : pab_blocks->host_buffer[j * n1 + i] = 0.5 * f * pab[j][i];
359 : }
360 : }
361 52 : start_time = omp_get_wtime();
362 52 : const int nlevels = 1;
363 52 : const int natoms = 2;
364 52 : if (collocate) {
365 : // collocate
366 26 : offload_buffer *grids[1] = {grid_test};
367 26 : grid_collocate_task_list(task_list, func, nlevels,
368 : (const int(*)[3])npts_local, pab_blocks, grids);
369 : } else {
370 : // integrate
371 26 : const offload_buffer *grids[1] = {grid_ref};
372 26 : grid_integrate_task_list(task_list, compute_tau, natoms, nlevels,
373 : (const int(*)[3])npts_local, pab_blocks, grids,
374 : hab_blocks, forces_test, virial_test);
375 422 : for (int i = 0; i < n2; i++) {
376 11530 : for (int j = 0; j < n1; j++) {
377 11134 : hab_test[i][j] = hab_blocks->host_buffer[i * n1 + j];
378 : }
379 : }
380 : }
381 52 : end_time = omp_get_wtime();
382 52 : grid_free_basis_set(basisa);
383 52 : grid_free_basis_set(basisb);
384 52 : grid_free_task_list(task_list);
385 52 : offload_free_buffer(pab_blocks);
386 52 : offload_free_buffer(hab_blocks);
387 : } else {
388 52 : start_time = omp_get_wtime();
389 52 : if (collocate) {
390 : // collocate
391 26 : memset(grid_test->host_buffer, 0, npts_local_total * sizeof(double));
392 52 : for (int i = 0; i < cycles; i++) {
393 26 : grid_cpu_collocate_pgf_product(
394 : orthorhombic, border_mask, func, la_max, la_min, lb_max, lb_min,
395 : zeta, zetb, rscale, dh, dh_inv, ra, rab, npts_global, npts_local,
396 : shift_local, border_width, radius, o1, o2, n1, n2, pab,
397 26 : grid_test->host_buffer);
398 : }
399 : } else {
400 : // integrate
401 26 : memset(hab_test, 0, n2 * n1 * sizeof(double));
402 26 : memset(forces_test, 0, 2 * 3 * sizeof(double));
403 26 : double virials_test[2][3][3] = {0};
404 52 : for (int i = 0; i < cycles; i++) {
405 26 : grid_cpu_integrate_pgf_product(
406 : orthorhombic, compute_tau, border_mask, la_max, la_min, lb_max,
407 : lb_min, zeta, zetb, dh, dh_inv, ra, rab, npts_global, npts_local,
408 : shift_local, border_width, radius, o1, o2, n1, n2,
409 26 : grid_ref->host_buffer, hab_test, pab, forces_test, virials_test,
410 : NULL, NULL, NULL);
411 : }
412 104 : for (int i = 0; i < 3; i++) {
413 312 : for (int j = 0; j < 3; j++) {
414 234 : virial_test[i][j] = virials_test[0][i][j] + virials_test[1][i][j];
415 : }
416 : }
417 : }
418 52 : end_time = omp_get_wtime();
419 : }
420 :
421 104 : double max_value = 0.0;
422 104 : double max_rel_diff = 0.0;
423 104 : const double derivatives_precision = 1e-4; // account for higher numeric noise
424 104 : if (collocate) {
425 : // collocate
426 : // compare grid
427 7151464 : for (int i = 0; i < npts_local_total; i++) {
428 7151412 : const double ref_value = cycles * grid_ref->host_buffer[i];
429 7151412 : const double test_value = grid_test->host_buffer[i];
430 7151412 : const double diff = fabs(test_value - ref_value);
431 7151412 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
432 7151412 : max_rel_diff = fmax(max_rel_diff, rel_diff);
433 7151412 : max_value = fmax(max_value, fabs(test_value));
434 : }
435 : } else {
436 : // integrate
437 : // compare hab
438 844 : for (int i = 0; i < n2; i++) {
439 23060 : for (int j = 0; j < n1; j++) {
440 22268 : const double ref_value = cycles * hab_ref[i][j];
441 22268 : const double test_value = hab_test[i][j];
442 22268 : const double diff = fabs(test_value - ref_value);
443 22268 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
444 22268 : max_rel_diff = fmax(max_rel_diff, rel_diff);
445 22268 : max_value = fmax(max_value, fabs(test_value));
446 22268 : if (rel_diff > tolerance) {
447 0 : printf("hab[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n", i,
448 : j, ref_value, test_value, diff, rel_diff);
449 : }
450 : }
451 : }
452 : // compare forces
453 156 : for (int i = 0; i < 2; i++) {
454 416 : for (int j = 0; j < 3; j++) {
455 312 : const double ref_value = cycles * forces_ref[i][j];
456 312 : const double test_value = forces_test[i][j];
457 312 : const double diff = fabs(test_value - ref_value);
458 312 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
459 312 : max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
460 312 : if (rel_diff * derivatives_precision > tolerance) {
461 0 : printf("forces[%i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
462 : i, j, ref_value, test_value, diff, rel_diff);
463 : }
464 : }
465 : }
466 : // compare virial
467 208 : for (int i = 0; i < 3; i++) {
468 624 : for (int j = 0; j < 3; j++) {
469 468 : const double ref_value = cycles * virial_ref[i][j];
470 468 : const double test_value = virial_test[i][j];
471 468 : const double diff = fabs(test_value - ref_value);
472 468 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
473 468 : max_rel_diff = fmax(max_rel_diff, rel_diff * derivatives_precision);
474 468 : if (rel_diff * derivatives_precision > tolerance) {
475 0 : printf("virial[ %i, %i] ref: %le test: %le diff:%le rel_diff: %le\n",
476 : i, j, ref_value, test_value, diff, rel_diff);
477 : }
478 : }
479 : }
480 : }
481 104 : printf("Task: %-55s %9s %-7s Cycles: %e Max value: %le "
482 : "Max rel diff: %le Time: %le sec\n",
483 : filename, collocate ? "Collocate" : "Integrate",
484 104 : batch ? "Batched" : "PGF-CPU", (float)cycles, max_value, max_rel_diff,
485 : end_time - start_time);
486 :
487 104 : offload_free_buffer(grid_ref);
488 104 : offload_free_buffer(grid_test);
489 :
490 : // Check floating point exceptions.
491 104 : if (fetestexcept(FE_DIVBYZERO) != 0) {
492 0 : fprintf(stderr, "Error: Floating point exception FE_DIVBYZERO.\n");
493 0 : exit(1);
494 : }
495 104 : if (fetestexcept(FE_OVERFLOW) != 0) {
496 0 : fprintf(stderr, "Error: Floating point exception FE_OVERFLOW.\n");
497 0 : exit(1);
498 : }
499 :
500 104 : return max_rel_diff < tolerance;
501 : }
502 :
503 : // EOF
|